R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tswge)
library(nnfor)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(vars)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: urca
## Loading required package: lmtest
us<-read.csv("/Data Folder/University/SMU/R for Time Series/Project/US_daily.csv",header=T)
head(us,2)
##       date states positive negative pending hospitalizedCurrently
## 1 20200715     56  3478419 39042608    2947                 56147
## 2 20200714     56  3413313 38351244     960                 55509
##   hospitalizedCumulative inIcuCurrently inIcuCumulative onVentilatorCurrently
## 1                 269507           6326           12002                  2322
## 2                 267127           6233           11857                  2263
##   onVentilatorCumulative recovered          dateChecked  death hospitalized
## 1                   1166   1075882 2020-07-15T00:00:00Z 129595       269507
## 2                   1154   1049098 2020-07-14T00:00:00Z 128740       267127
##           lastModified    total totalTestResults   posNeg deathIncrease
## 1 2020-07-15T00:00:00Z 42523974         42521027 42521027           855
## 2 2020-07-14T00:00:00Z 41765517         41764557 41764557           736
##   hospitalizedIncrease negativeIncrease positiveIncrease
## 1                 2380           691364            65106
## 2                 2262           697403            62879
##   totalTestResultsIncrease                                     hash
## 1                   756470 4e2fa5f3baa2968cef59f8395ece2f173d078291
## 2                   760282 dc2b411ce1df07a2a25be3afa90ad1e0402bd8d6
#plot daily test positive count
plotts.wge(us$positiveIncrease)

# exclude early days only sick people gets tested
us1<-subset.data.frame(us,date>20200303) # data looks reasonable
us2<-subset.data.frame(us,date>20200503) # ensure the test positive rate is reasonable, for example <10%
summary.data.frame(us2)
##       date              states      positive          negative       
##  Min.   :20200504   Min.   :56   Min.   :1185985   Min.   : 6141286  
##  1st Qu.:20200522   1st Qu.:56   1st Qu.:1607510   1st Qu.:11893006  
##  Median :20200609   Median :56   Median :1976557   Median :19116676  
##  Mean   :20200597   Mean   :56   Mean   :2090309   Mean   :20236873  
##  3rd Qu.:20200627   3rd Qu.:56   3rd Qu.:2499250   3rd Qu.:27947034  
##  Max.   :20200715   Max.   :56   Max.   :3478419   Max.   :39042608  
##     pending     hospitalizedCurrently hospitalizedCumulative inIcuCurrently 
##  Min.   : 960   Min.   :27770         Min.   :139220         Min.   : 5163  
##  1st Qu.:1891   1st Qu.:31468         1st Qu.:182385         1st Qu.: 5628  
##  Median :2208   Median :37259         Median :219642         Median : 6402  
##  Mean   :2466   Mean   :38138         Mean   :212301         Mean   : 7496  
##  3rd Qu.:2978   3rd Qu.:43004         3rd Qu.:239576         3rd Qu.: 9048  
##  Max.   :4084   Max.   :56147         Max.   :269507         Max.   :12696  
##  inIcuCumulative onVentilatorCurrently onVentilatorCumulative   recovered      
##  Min.   : 4579   Min.   :1982          Min.   : 430.0         Min.   : 186752  
##  1st Qu.: 7689   1st Qu.:2248          1st Qu.: 633.0         1st Qu.: 350135  
##  Median : 9141   Median :3090          Median : 771.0         Median : 609476  
##  Mean   : 8923   Mean   :3562          Mean   : 800.9         Mean   : 587703  
##  3rd Qu.:10415   3rd Qu.:4716          3rd Qu.: 977.0         3rd Qu.: 772465  
##  Max.   :12002   Max.   :7070          Max.   :1166.0         Max.   :1075882  
##  dateChecked            death         hospitalized    lastModified      
##  Length:73          Min.   : 63653   Min.   :139220   Length:73         
##  Class :character   1st Qu.: 90547   1st Qu.:182385   Class :character  
##  Mode  :character   Median :105838   Median :219642   Mode  :character  
##                     Mean   :103152   Mean   :212301                     
##                     3rd Qu.:118955   3rd Qu.:239576                     
##                     Max.   :129595   Max.   :269507                     
##      total          totalTestResults       posNeg         deathIncrease   
##  Min.   : 7330062   Min.   : 7327271   Min.   : 7327271   Min.   : 209.0  
##  1st Qu.:13504225   1st Qu.:13500516   1st Qu.:13500516   1st Qu.: 640.0  
##  Median :21094894   Median :21093233   Median :21093233   Median : 823.0  
##  Mean   :22329648   Mean   :22327182   Mean   :22327182   Mean   : 917.2  
##  3rd Qu.:30448470   3rd Qu.:30446284   3rd Qu.:30446284   3rd Qu.:1024.0  
##  Max.   :42523974   Max.   :42521027   Max.   :42521027   Max.   :2740.0  
##  hospitalizedIncrease negativeIncrease positiveIncrease
##  Min.   :-2841        Min.   :208933   Min.   :16629   
##  1st Qu.: 1040        1st Qu.:368410   1st Qu.:21319   
##  Median : 1435        Median :439702   Median :24701   
##  Mean   : 1807        Mean   :453565   Mean   :31712   
##  3rd Qu.: 1856        3rd Qu.:559083   3rd Qu.:41857   
##  Max.   :17320        Max.   :756730   Max.   :66645   
##  totalTestResultsIncrease     hash          
##  Min.   :231456           Length:73         
##  1st Qu.:390000           Class :character  
##  Median :462378           Mode  :character  
##  Mean   :485277                             
##  3rd Qu.:602866                             
##  Max.   :823375
#select all possible correlated variables for daily test positive rate
date = rev(us2$date)
hospitalized=rev(us2$hospitalizedCurrently)
inIcu=rev(us2$inIcuCurrently)
onVent=rev(us2$onVentilatorCurrently)
recovered=rev(us2$recovered)
deathIncrease=rev(us2$deathIncrease)
negativeIncrease=rev(us2$negativeIncrease)
hospitalizedIncrease=rev(us2$hospitalizedIncrease)
pending=rev(us2$pending)

us3 = data.frame (hospitalized,inIcu,onVent,deathIncrease,negativeIncrease,hospitalizedIncrease)       
us3$posrate <- rev((us2$positiveIncrease/us2$totalTestResultsIncrease)*100)

library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(us3)

#us2$hospitalizedCurrently  0.722
#us2$deathIncrease    0.24
#us2$inIcuCurrently   0.205
# we'll move forward for multivariant analysis factor these 3 variables
hist(us3$posrate, plot=T)

hist(us2$hospitalizedCurrently)

hist(us2$deathIncrease)

hist(us2$inIcuCurrently)

# clean dataset ready for modeling analaysis
us4 = data.frame(posrate=us3$posrate, hospitalized=us3$hospitalized,deathIncrease=us3$deathIncrease,inIcu=us3$inIcu)
summary(us4)
##     posrate        hospitalized   deathIncrease        inIcu      
##  Min.   : 3.803   Min.   :27770   Min.   : 209.0   Min.   : 5163  
##  1st Qu.: 5.014   1st Qu.:31468   1st Qu.: 640.0   1st Qu.: 5628  
##  Median : 5.918   Median :37259   Median : 823.0   Median : 6402  
##  Mean   : 6.446   Mean   :38138   Mean   : 917.2   Mean   : 7496  
##  3rd Qu.: 8.094   3rd Qu.:43004   3rd Qu.:1024.0   3rd Qu.: 9048  
##  Max.   :10.234   Max.   :56147   Max.   :2740.0   Max.   :12696
length(us4$posrate)#73
## [1] 73
is.na.data.frame(us4)# no missing value
##       posrate hospitalized deathIncrease inIcu
##  [1,]   FALSE        FALSE         FALSE FALSE
##  [2,]   FALSE        FALSE         FALSE FALSE
##  [3,]   FALSE        FALSE         FALSE FALSE
##  [4,]   FALSE        FALSE         FALSE FALSE
##  [5,]   FALSE        FALSE         FALSE FALSE
##  [6,]   FALSE        FALSE         FALSE FALSE
##  [7,]   FALSE        FALSE         FALSE FALSE
##  [8,]   FALSE        FALSE         FALSE FALSE
##  [9,]   FALSE        FALSE         FALSE FALSE
## [10,]   FALSE        FALSE         FALSE FALSE
## [11,]   FALSE        FALSE         FALSE FALSE
## [12,]   FALSE        FALSE         FALSE FALSE
## [13,]   FALSE        FALSE         FALSE FALSE
## [14,]   FALSE        FALSE         FALSE FALSE
## [15,]   FALSE        FALSE         FALSE FALSE
## [16,]   FALSE        FALSE         FALSE FALSE
## [17,]   FALSE        FALSE         FALSE FALSE
## [18,]   FALSE        FALSE         FALSE FALSE
## [19,]   FALSE        FALSE         FALSE FALSE
## [20,]   FALSE        FALSE         FALSE FALSE
## [21,]   FALSE        FALSE         FALSE FALSE
## [22,]   FALSE        FALSE         FALSE FALSE
## [23,]   FALSE        FALSE         FALSE FALSE
## [24,]   FALSE        FALSE         FALSE FALSE
## [25,]   FALSE        FALSE         FALSE FALSE
## [26,]   FALSE        FALSE         FALSE FALSE
## [27,]   FALSE        FALSE         FALSE FALSE
## [28,]   FALSE        FALSE         FALSE FALSE
## [29,]   FALSE        FALSE         FALSE FALSE
## [30,]   FALSE        FALSE         FALSE FALSE
## [31,]   FALSE        FALSE         FALSE FALSE
## [32,]   FALSE        FALSE         FALSE FALSE
## [33,]   FALSE        FALSE         FALSE FALSE
## [34,]   FALSE        FALSE         FALSE FALSE
## [35,]   FALSE        FALSE         FALSE FALSE
## [36,]   FALSE        FALSE         FALSE FALSE
## [37,]   FALSE        FALSE         FALSE FALSE
## [38,]   FALSE        FALSE         FALSE FALSE
## [39,]   FALSE        FALSE         FALSE FALSE
## [40,]   FALSE        FALSE         FALSE FALSE
## [41,]   FALSE        FALSE         FALSE FALSE
## [42,]   FALSE        FALSE         FALSE FALSE
## [43,]   FALSE        FALSE         FALSE FALSE
## [44,]   FALSE        FALSE         FALSE FALSE
## [45,]   FALSE        FALSE         FALSE FALSE
## [46,]   FALSE        FALSE         FALSE FALSE
## [47,]   FALSE        FALSE         FALSE FALSE
## [48,]   FALSE        FALSE         FALSE FALSE
## [49,]   FALSE        FALSE         FALSE FALSE
## [50,]   FALSE        FALSE         FALSE FALSE
## [51,]   FALSE        FALSE         FALSE FALSE
## [52,]   FALSE        FALSE         FALSE FALSE
## [53,]   FALSE        FALSE         FALSE FALSE
## [54,]   FALSE        FALSE         FALSE FALSE
## [55,]   FALSE        FALSE         FALSE FALSE
## [56,]   FALSE        FALSE         FALSE FALSE
## [57,]   FALSE        FALSE         FALSE FALSE
## [58,]   FALSE        FALSE         FALSE FALSE
## [59,]   FALSE        FALSE         FALSE FALSE
## [60,]   FALSE        FALSE         FALSE FALSE
## [61,]   FALSE        FALSE         FALSE FALSE
## [62,]   FALSE        FALSE         FALSE FALSE
## [63,]   FALSE        FALSE         FALSE FALSE
## [64,]   FALSE        FALSE         FALSE FALSE
## [65,]   FALSE        FALSE         FALSE FALSE
## [66,]   FALSE        FALSE         FALSE FALSE
## [67,]   FALSE        FALSE         FALSE FALSE
## [68,]   FALSE        FALSE         FALSE FALSE
## [69,]   FALSE        FALSE         FALSE FALSE
## [70,]   FALSE        FALSE         FALSE FALSE
## [71,]   FALSE        FALSE         FALSE FALSE
## [72,]   FALSE        FALSE         FALSE FALSE
## [73,]   FALSE        FALSE         FALSE FALSE
#Create a line chart for daily test positive and death rate
# data used are from March 3rd to July 15th
# get the range for the x and y axis
xt=c(1:length(us1$date))
xrange <- range(xt)
# get the test positive rate
dposrate <- us1$positiveIncrease/(us1$positiveIncrease+us1$negativeIncrease)
dposrater<-rev(dposrate)
yrange <- range(dposrater)
# get the death to positive rate
ddrate <- us1$deathIncrease/us1$positiveIncrease
ddrater<-rev(ddrate)

# plot the graph
plot(xt, dposrater, type="o", lwd=0.5,
    lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 04 to July 15)", ylab="Daily Test Positive / Death Rate")
lines(xt, ddrater, type="o", lwd=0.5,
    lty=2, col='red', pch=16,cex=0.4)

# add a title and subtitle
title("Daily US Test Positive & Death Rate")

# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)

#Create a line chart for daily test positive and death count (March 03, to July 15)
# get the range for the x and y axis
# get the test positive count
dpos <- us1$positiveIncrease
dposr<-rev(dpos)
yrange <- range(dposr)
# get the death count
dd <- us1$deathIncrease
ddr<-rev(dd)

# plot the graph
plot(xt, dposr, type="o", lwd=0.5,
    lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 04 to July 15)", ylab="Daily Test Positive / Death Count")
lines(xt, ddr, type="o", lwd=0.5,
    lty=2, col='red', pch=16,cex=0.4)

# add a title and subtitle
title("Daily US Test Positive & Death Count")

# add a legend
legend(5, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
plotts.sample.wge(dposrater)

## $autplt
##  [1] 1.0000000 0.8869272 0.8680812 0.8503183 0.7903073 0.7813791 0.7607400
##  [8] 0.7480428 0.7296608 0.7389640 0.7264490 0.6756612 0.6958569 0.6656387
## [15] 0.6640534 0.6502526 0.6021160 0.5909614 0.5680928 0.5558950 0.5188207
## [22] 0.5025282 0.4917558 0.4733021 0.4587867 0.4347135
## 
## $freq
##  [1] 0.007462687 0.014925373 0.022388060 0.029850746 0.037313433 0.044776119
##  [7] 0.052238806 0.059701493 0.067164179 0.074626866 0.082089552 0.089552239
## [13] 0.097014925 0.104477612 0.111940299 0.119402985 0.126865672 0.134328358
## [19] 0.141791045 0.149253731 0.156716418 0.164179104 0.171641791 0.179104478
## [25] 0.186567164 0.194029851 0.201492537 0.208955224 0.216417910 0.223880597
## [31] 0.231343284 0.238805970 0.246268657 0.253731343 0.261194030 0.268656716
## [37] 0.276119403 0.283582090 0.291044776 0.298507463 0.305970149 0.313432836
## [43] 0.320895522 0.328358209 0.335820896 0.343283582 0.350746269 0.358208955
## [49] 0.365671642 0.373134328 0.380597015 0.388059701 0.395522388 0.402985075
## [55] 0.410447761 0.417910448 0.425373134 0.432835821 0.440298507 0.447761194
## [61] 0.455223881 0.462686567 0.470149254 0.477611940 0.485074627 0.492537313
## [67] 0.500000000
## 
## $db
##  [1]  17.1849025  -1.9573945  -1.0222534  -0.2746971   0.1276307  -0.8095633
##  [7]  -0.1818880  -4.8735910  -3.0803263  -0.3094095  -1.2016340  -2.6269452
## [13]  -5.4361635 -12.8869678  -4.7396201  -2.7708140  -5.4613032 -15.1177175
## [19]  -8.2619075  -7.8753721 -17.5877733 -10.9327719 -18.6492801 -25.4145610
## [25] -22.8167144 -27.0042665 -11.5344329  -6.5467609  -3.6915075 -16.8663437
## [31] -18.5196997 -11.0480780 -16.1299132 -21.8076343 -20.1105694 -12.1691103
## [37]  -8.4572314 -12.6006590 -16.1507152 -11.2128581 -10.9418647  -6.9043297
## [43]  -3.7563111  -9.8600573  -6.8303428  -6.2096606  -6.3149538 -21.4387545
## [49] -28.4882072 -12.2443745  -7.3894809 -11.0501004 -11.7172251  -5.6489935
## [55]  -5.6240358  -8.0648930  -7.2319620  -9.2171920 -27.4439154 -13.6211039
## [61] -14.5765678 -25.7861235 -21.0316946  -9.7190535  -7.5058779 -21.0261805
## [67] -25.8419347
## 
## $dbz
##  [1]  11.2044994  10.6420938   9.6987222   8.3691728   6.6577431   4.6018827
##  [7]   2.3240087   0.1043517  -1.6562611  -2.7603493  -3.4161863  -3.9208749
## [13]  -4.3990158  -4.8457504  -5.2510187  -5.6612031  -6.1561114  -6.7978773
## [19]  -7.6022728  -8.5349977  -9.5158106 -10.4182676 -11.0746386 -11.3331326
## [25] -11.1779644 -10.7728953 -10.3481431 -10.0803792 -10.0618176 -10.3151049
## [31] -10.8067007 -11.4461370 -12.0799893 -12.5105025 -12.5734060 -12.2358892
## [37] -11.6013733 -10.8201247 -10.0171717  -9.2768827  -8.6534957  -8.1828926
## [43]  -7.8892288  -7.7866508  -7.8766836  -8.1409253  -8.5288240  -8.9450569
## [49]  -9.2546628  -9.3337224  -9.1515757  -8.7982640  -8.4211404  -8.1502092
## [55]  -8.0705121  -8.2267796  -8.6328537  -9.2732931 -10.0939742 -10.9851736
## [61] -11.7778687 -12.2967709 -12.4713181 -12.3858987 -12.1979826 -12.0379928
## [67] -11.9775222
# ACF slow dumping, a tiny cyclic behavior. Parzen window show strong frequency at 0, weak frequency at 0.3 and 0.4. 
# try ARMA model directly
is.na(dposrater)
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE
aic5.wge(dposrater,p=0:10, q=0:5)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 63   10    2  -7.408574
## 51    8    2  -7.389009
## 64   10    3  -7.373691
## 62   10    1  -7.326196
## 44    7    1  -7.303328
# AIC lingers around -7.x, doesn't change much. aic5.wge always prefer to pick high order p&q
# try to take first diff
dposrater_d1<-artrans.wge(dposrater,phi.tr = 1)

acf(dposrater_d1)
# ACF looks much better, still 3 lags are above the limit line. 
aic5.wge(dposrater_d1, p=0:10, q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 52   10    1  -7.473906
## 38    7    2  -7.472336
## 51   10    0  -7.466801
## 53   10    2  -7.461169
## 49    9    3  -7.385656
# still pick 10, 1.AIC lingers around -7.4 is there any other thing we can do here?
aic5.wge(dposrater_d1, p=0:10, q=0:4, type="bic")
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic
##       p    q        bic
## 2     0    1  -7.258225
## 38    7    2  -7.255016
## 51   10    0  -7.227750
## 7     1    1  -7.223371
## 4     0    3  -7.213250
# BIC pick 0, 1, BIC lingers around -7.2
# lets first test out the residual
estd1aic <- est.arma.wge(dposrater_d1, p=10, q=1, factor = T)
## 
## Coefficients of Original polynomial:  
## -1.0004 -0.5116 -0.2541 -0.4236 -0.2947 -0.0075 0.0816 0.0384 0.2274 0.3758 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.9251B+0.9084B^2   -0.5092+-0.9174i      0.9531       0.3306
## 1+1.6248B+0.9078B^2   -0.8949+-0.5484i      0.9528       0.4125
## 1+0.9132B             -1.0951               0.9132       0.5000
## 1-1.2801B+0.8217B^2    0.7789+-0.7812i      0.9065       0.1252
## 1-0.3765B+0.7533B^2    0.2499+-1.1247i      0.8680       0.2152
## 1-0.8061B              1.2405               0.8061       0.0000
##   
## 
fore.aruma.wge(dposrater,d=1, phi = estd1aic$phi,theta = estd1aic$theta, n.ahead = 40, lastn= T, limits=F )

## $f
##  [1] 0.04553089 0.04284428 0.04598659 0.04573289 0.04300856 0.04402237
##  [7] 0.04244493 0.04218625 0.04479960 0.04345762 0.04318432 0.04351993
## [13] 0.04374729 0.04241314 0.04316301 0.04321173 0.04226448 0.04391539
## [19] 0.04350377 0.04254496 0.04354990 0.04290702 0.04278937 0.04308944
## [25] 0.04326403 0.04278179 0.04325579 0.04346832 0.04258768 0.04322394
## [31] 0.04317545 0.04265355 0.04333698 0.04309370 0.04295115 0.04313800
## [37] 0.04319507 0.04288424 0.04301477 0.04325720
## 
## $ll
##  [1]  0.0071202237  0.0004555798 -0.0003705090 -0.0042336397 -0.0074844352
##  [6] -0.0089012519 -0.0136234585 -0.0165334855 -0.0171122663 -0.0233294827
## [11] -0.0281307554 -0.0289498173 -0.0324613464 -0.0350846677 -0.0362392294
## [16] -0.0401212914 -0.0430718433 -0.0441232729 -0.0473160499 -0.0504298371
## [21] -0.0512563375 -0.0538500256 -0.0562832278 -0.0574269216 -0.0601706854
## [26] -0.0626424013 -0.0638001606 -0.0660241088 -0.0684348504 -0.0695360872
## [31] -0.0715020481 -0.0737697203 -0.0748493632 -0.0768944768 -0.0789867677
## [36] -0.0801692006 -0.0819707425 -0.0839189934 -0.0851676787 -0.0868015242
## 
## $ul
##  [1] 0.08394155 0.08523298 0.09234368 0.09569942 0.09350155 0.09694600
##  [7] 0.09851331 0.10090598 0.10671147 0.11024473 0.11449940 0.11598967
## [13] 0.11995593 0.11991095 0.12256525 0.12654476 0.12760079 0.13195405
## [19] 0.13432359 0.13551977 0.13835615 0.13966406 0.14186198 0.14360580
## [25] 0.14669875 0.14820599 0.15031174 0.15296074 0.15361020 0.15598396
## [31] 0.15785295 0.15907683 0.16152332 0.16308188 0.16488906 0.16644520
## [37] 0.16836087 0.16968746 0.17119722 0.17331592
## 
## $resid
##   [1]  0.0000000000  0.0000000000  0.0000000000  0.0000000000  0.0000000000
##   [6]  0.0000000000  0.0000000000  0.0000000000  0.0000000000  0.0000000000
##  [11]  0.0000000000 -0.0039999620 -0.0739936114  0.0363590434  0.0213066453
##  [16]  0.0472709714 -0.0093265449  0.0281772730  0.0393115959  0.0057687847
##  [21] -0.0496757505  0.0145343798  0.0068346192  0.0017334952  0.0240223114
##  [26]  0.0064135655  0.0092724695  0.0215753337  0.0196701116  0.0417205695
##  [31]  0.0109668264 -0.0932690919  0.0223541072 -0.0106624045 -0.0068957780
##  [36] -0.0222751525  0.0183258408 -0.0003240235  0.0198974353 -0.0157488966
##  [41] -0.0022873029 -0.0151169221  0.0142354288 -0.0002772878 -0.0159155653
##  [46] -0.0074287305 -0.0044220713 -0.0176438040 -0.0061500383 -0.0818353354
##  [51]  0.0292037538 -0.0109800898 -0.0267642203 -0.0166364390 -0.0165518568
##  [56]  0.0148986276 -0.0150003119  0.0071164033  0.0048502716  0.0073928038
##  [61] -0.0352980200  0.0107165961 -0.0319613559  0.0136740260 -0.0059624661
##  [66] -0.0089577824 -0.0121959316 -0.0054712390 -0.0316942692  0.0022635168
##  [71]  0.0018637173  0.0075889624 -0.0120564437 -0.0008900479 -0.0058186418
##  [76] -0.0027530270 -0.0052448308  0.0017408074  0.0023080118 -0.0050652307
##  [81]  0.0025575981 -0.0082652545 -0.0045770039  0.0076808857  0.0130910996
##  [86] -0.0085683742 -0.0058034786  0.0038407670  0.0042357781 -0.0065341917
##  [91] -0.0053809293 -0.0025912040 -0.0004371642 -0.0071759615 -0.0004346580
##  [96] -0.0005459094 -0.0025805336 -0.0031556082  0.0086458854  0.0032903552
## [101] -0.0077221452  0.0078777877 -0.0006608687 -0.0016496700  0.0040121631
## [106]  0.0047182790  0.0053271036  0.0017535559  0.0034037426  0.0028891269
## [111]  0.0048646978  0.0073911792  0.0192005983 -0.0106556817  0.0082998347
## [116]  0.0065950189  0.0017933527 -0.0174259314  0.0060903908  0.0178127323
## [121]  0.0036572659 -0.0085257319  0.0057117166 -0.0067023193  0.0140513222
## [126]  0.0009403316  0.0186788346 -0.0088552550 -0.0085178429  0.0144148195
## [131] -0.0019128732 -0.0105996177 -0.0025830497  0.0066987745
## 
## $wnv
## [1] 0.0003840533
## 
## $se
##  [1] 0.01959728 0.02162689 0.02365158 0.02549313 0.02576173 0.02700185
##  [7] 0.02860632 0.02995905 0.03158769 0.03407505 0.03638524 0.03697436
## [13] 0.03888196 0.03953970 0.04051135 0.04251685 0.04353894 0.04491768
## [19] 0.04633664 0.04743612 0.04837053 0.04936584 0.05054725 0.05128386
## [25] 0.05277281 0.05378785 0.05462038 0.05586348 0.05664415 0.05753063
## [31] 0.05850893 0.05939963 0.06029915 0.06121846 0.06221322 0.06291184
## [37] 0.06386011 0.06469553 0.06539921 0.06635649
## 
## $psi
##  [1] 0.4667524 0.4885730 0.4854386 0.1893336 0.4127651 0.4819912 0.4541823
##  [8] 0.5108806 0.6521292 0.6510290 0.3354540 0.6138238 0.3664788 0.4500308
## [15] 0.6584592 0.4785622 0.5635229 0.5806522 0.5181269 0.4828035 0.5032814
## [22] 0.5543900 0.4419400 0.6351560 0.5306942 0.4847692 0.5980073 0.4782196
## [29] 0.5133619 0.5436806 0.5229295 0.5294874 0.5393287 0.5654281 0.4770849
## [36] 0.5594762 0.5288127 0.4882284 0.5730714 0.5154248
## 
## $ptot
## [1] 11
## 
## $phitot
##  [1] -0.0003645301  0.4887431558  0.2574946290 -0.1694624292  0.1288714317
##  [6]  0.2872520914  0.0890943500 -0.0432114691  0.1889720066  0.1483861169
## [11] -0.3757753530
# as we have 1-B term, the long term forcast is just repeating the last value in dataset. 
# try forecast without the 1-B
estaic <- est.arma.wge(dposrater, p=10, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## -0.8628 0.2800 0.7777 0.1435 -0.0007 0.2721 0.2637 -0.0886 -0.0682 0.2012 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9884B              1.0118               0.9884       0.0000
## 1+1.6766B+0.9598B^2   -0.8735+-0.5282i      0.9797       0.4134
## 1+0.9536B+0.8564B^2   -0.5567+-0.9261i      0.9254       0.3361
## 1+0.8930B             -1.1198               0.8930       0.5000
## 1-0.5509B+0.5853B^2    0.4707+-1.2195i      0.7650       0.1914
## 1-1.1211B+0.4738B^2    1.1831+-0.8432i      0.6883       0.0986
##   
## 
fore.aruma.wge(dposrater, phi = estaic$phi,theta = estaic$theta, n.ahead = 80, lastn= F, limits=F )

## $f
##  [1] 0.09046394 0.08682474 0.09465579 0.08150466 0.09205723 0.09117187
##  [7] 0.08483302 0.09554320 0.08626470 0.09138996 0.09163261 0.08962519
## [13] 0.09365068 0.08850201 0.09558623 0.08912740 0.09349305 0.09453649
## [19] 0.08918413 0.09741067 0.09065554 0.09466005 0.09496027 0.09224209
## [25] 0.09710799 0.09209900 0.09732234 0.09396112 0.09522708 0.09763389
## [31] 0.09299941 0.09911673 0.09451177 0.09696615 0.09796546 0.09501309
## [37] 0.09962530 0.09541843 0.09904662 0.09761238 0.09723579 0.10016115
## [43] 0.09623402 0.10069907 0.09784052 0.09895392 0.10051224 0.09762997
## [49] 0.10156960 0.09839439 0.10069081 0.10047927 0.09927927 0.10220408
## [55] 0.09903463 0.10217107 0.10062851 0.10077426 0.10262217 0.10002406
## [61] 0.10316455 0.10098428 0.10223669 0.10277329 0.10124437 0.10387629
## [67] 0.10146132 0.10353747 0.10293346 0.10246925 0.10436182 0.10217022
## [73] 0.10452970 0.10319300 0.10368460 0.10462987 0.10307120 0.10527228
## [79] 0.10354989 0.10480568
## 
## $ll
##  [1]  0.0450603030  0.0360765553  0.0394586038  0.0217355573  0.0313823395
##  [6]  0.0280196721  0.0182493798  0.0270075314  0.0161361932  0.0191113261
## [11]  0.0168313730  0.0140593268  0.0143314561  0.0082784603  0.0140011780
## [16]  0.0050884636  0.0087067922  0.0078804809  0.0012435823  0.0082674355
## [21]  0.0002025128  0.0031437181  0.0019023519 -0.0014409595  0.0017447383
## [26] -0.0040639257  0.0003033379 -0.0045164234 -0.0037693899 -0.0026162225
## [31] -0.0080280685 -0.0027031338 -0.0082847340 -0.0064375990 -0.0064859741
## [36] -0.0099441726 -0.0062727777 -0.0111413595 -0.0080659490 -0.0104557277
## [41] -0.0112293451 -0.0091342334 -0.0136153056 -0.0096792392 -0.0132606529
## [46] -0.0125462294 -0.0117138267 -0.0149976660 -0.0116373480 -0.0153409002
## [51] -0.0134182806 -0.0142856993 -0.0157982621 -0.0134247817 -0.0170155311
## [56] -0.0142398910 -0.0163146977 -0.0164542463 -0.0151096320 -0.0180267396
## [61] -0.0152634367 -0.0178556909 -0.0168670228 -0.0167890274 -0.0185669142
## [66] -0.0163053378 -0.0190473233 -0.0172233425 -0.0182170092 -0.0188958655
## [71] -0.0173522271 -0.0197974001 -0.0176929723 -0.0193451964 -0.0190466099
## [76] -0.0184241486 -0.0201819722 -0.0182333794 -0.0202097677 -0.0191347795
## 
## $ul
##  [1] 0.1358676 0.1375729 0.1498530 0.1412738 0.1527321 0.1543241 0.1514167
##  [8] 0.1640789 0.1563932 0.1636686 0.1664338 0.1651911 0.1729699 0.1687256
## [15] 0.1771713 0.1731663 0.1782793 0.1811925 0.1771247 0.1865539 0.1811086
## [22] 0.1861764 0.1880182 0.1859251 0.1924712 0.1882619 0.1943413 0.1924387
## [29] 0.1942236 0.1978840 0.1940269 0.2009366 0.1973083 0.2003699 0.2024169
## [36] 0.1999704 0.2055234 0.2019782 0.2061592 0.2056805 0.2057009 0.2094565
## [43] 0.2060833 0.2110774 0.2089417 0.2104541 0.2127383 0.2102576 0.2147766
## [50] 0.2121297 0.2147999 0.2152442 0.2143568 0.2178329 0.2150848 0.2185820
## [57] 0.2175717 0.2180028 0.2203540 0.2180749 0.2215925 0.2198242 0.2213404
## [64] 0.2223356 0.2210557 0.2240579 0.2219700 0.2242983 0.2240839 0.2238344
## [71] 0.2260759 0.2241378 0.2267524 0.2257312 0.2264158 0.2276839 0.2263244
## [78] 0.2287779 0.2273095 0.2287461
## 
## $resid
##   [1]  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##   [6]  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  [11] -4.043851e-02  8.039758e-02 -1.178948e-01  6.774276e-02 -4.961026e-03
##  [16] -1.405217e-03  1.261723e-03  7.705120e-03  5.090656e-02  1.557385e-02
##  [21] -4.904113e-02  1.034379e-02  4.621164e-03  1.752960e-02  2.479506e-02
##  [26]  5.099403e-03  2.599601e-02  1.707985e-02  2.415617e-02  3.813433e-02
##  [31] -1.169165e-03 -7.358963e-02  3.045081e-02 -4.703371e-03  6.294692e-03
##  [36] -1.591581e-02  2.501523e-02  6.789122e-03  2.316101e-02 -2.539909e-02
##  [41] -1.304472e-02 -1.250431e-02  2.079005e-02  9.690281e-03 -1.748688e-02
##  [46]  2.721836e-03 -2.897509e-03 -1.898228e-02 -3.483165e-03 -9.292373e-02
##  [51]  3.826990e-02 -7.866898e-03 -2.098428e-02 -1.517631e-02 -2.628232e-02
##  [56]  1.584698e-02 -2.586034e-02  1.393126e-03 -3.667954e-03  3.529978e-03
##  [61] -2.519869e-02  4.111394e-03 -4.114971e-02  1.598877e-02 -8.674887e-03
##  [66] -1.043706e-02 -6.741850e-03 -1.375786e-02 -3.100093e-02 -3.354298e-03
##  [71] -3.655560e-03  4.943688e-03 -1.335403e-02  3.733053e-05 -1.251938e-02
##  [76] -7.850640e-03 -1.083139e-02 -5.134337e-03  1.773355e-03 -3.126970e-03
##  [81]  1.573347e-03 -1.257067e-02 -7.929839e-03  2.857129e-03  8.297269e-03
##  [86] -8.866218e-03 -8.367533e-03  3.506115e-03  2.273695e-03 -8.890312e-03
##  [91] -8.980982e-03 -4.752187e-03 -8.005004e-04 -6.603473e-03 -2.808370e-03
##  [96] -4.885122e-03 -3.867527e-03 -4.188559e-03  4.154389e-03 -4.999517e-04
## [101] -1.033769e-02  5.803065e-03 -3.228714e-03 -3.440704e-03  1.327057e-03
## [106]  2.176459e-03  4.377936e-03  4.616404e-04  1.862384e-03 -2.761838e-04
## [111]  2.919216e-03  6.696175e-03  1.746019e-02 -1.199935e-02  9.930755e-03
## [116]  4.598299e-03  7.677183e-04 -1.641156e-02  4.589696e-03  1.908465e-02
## [121]  5.195409e-03 -6.655687e-03  3.733049e-03 -9.313651e-03  1.467113e-02
## [126] -1.439113e-03  1.655986e-02 -7.256988e-03 -4.756600e-03  1.496201e-02
## [131] -5.703629e-03 -1.131819e-02 -2.416954e-03  7.713896e-03
## 
## $wnv
## [1] 0.0005366229
## 
## $se
##  [1] 0.02316512 0.02589193 0.02816183 0.03049444 0.03095657 0.03222051
##  [7] 0.03397124 0.03496718 0.03577985 0.03687685 0.03816390 0.03855401
## [13] 0.04046899 0.04093038 0.04162503 0.04287701 0.04325829 0.04421225
## [19] 0.04486763 0.04548124 0.04614951 0.04669201 0.04747853 0.04779747
## [25] 0.04865472 0.04906272 0.04949949 0.05024364 0.05050840 0.05114802
## [31] 0.05154463 0.05194891 0.05244719 0.05275702 0.05329155 0.05354963
## [37] 0.05402963 0.05436724 0.05464927 0.05513679 0.05533935 0.05576295
## [43] 0.05604557 0.05631546 0.05668427 0.05688783 0.05725820 0.05746308
## [49] 0.05775865 0.05802821 0.05821893 0.05855355 0.05871303 0.05899432
## [55] 0.05920927 0.05939335 0.05966490 0.05981046 0.06006724 0.06023000
## [61] 0.06042244 0.06063264 0.06076720 0.06100118 0.06112821 0.06131716
## [67] 0.06148400 0.06161266 0.06181147 0.06192098 0.06209900 0.06222838
## [73] 0.06235851 0.06251949 0.06261796 0.06278266 0.06288427 0.06301309
## [79] 0.06314268 0.06323493
## 
## $psi
##  [1] 0.4992795 0.4781693 0.5049441 0.2300463 0.3857512 0.4647053 0.3576937
##  [8] 0.3273234 0.3853964 0.4242387 0.2361628 0.5310360 0.2645523 0.3269031
## [15] 0.4440151 0.2473899 0.3943306 0.3298384 0.3214210 0.3378002 0.3063638
## [22] 0.3715173 0.2379648 0.3925316 0.2725702 0.2832361 0.3719104 0.2229559
## [29] 0.3480900 0.2754991 0.2792302 0.3113476 0.2464553 0.3250166 0.2266771
## [36] 0.3102086 0.2611431 0.2393638 0.3158174 0.2042127 0.2961446 0.2426639
## [43] 0.2377196 0.2786791 0.2075629 0.2806797 0.2092867 0.2519187 0.2411690
## [50] 0.2032610 0.2698464 0.1866796 0.2483961 0.2175945 0.2017055 0.2454550
## [57] 0.1800241 0.2395054 0.1910126 0.2080086 0.2177554 0.1744782 0.2304214
## [64] 0.1700290 0.2076393 0.1953973 0.1717944 0.2138359 0.1589041 0.2028412
## [71] 0.1731304 0.1738147 0.1935511 0.1515379 0.1961832 0.1542579 0.1738450
## [78] 0.1745446 0.1473932 0.1847774
## 
## $ptot
## [1] 10
## 
## $phitot
##  [1] -0.8628300742  0.2799978066  0.7777257868  0.1435384257 -0.0006915594
##  [6]  0.2721321259  0.2637148325 -0.0886366867 -0.0682142288  0.2011735763
# now it is very slowly trending towards dataset mean because it is a stationary model
# what about BIC pick simple model?
estd1bic <- est.arma.wge(dposrater_d1, p=0, q=1, factor = T)
fore.aruma.wge(dposrater,d=1, phi = estd1bic$phi,theta = estd1bic$theta, n.ahead = 40, lastn= F, limits=F )

## $f
##  [1] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
##  [7] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [13] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [19] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [25] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [31] 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329 0.08489329
## [37] 0.08489329 0.08489329 0.08489329 0.08489329
## 
## $ll
##  [1]  0.033524177  0.029836716  0.026381181  0.023118639  0.020019967
##  [6]  0.017062702  0.014229090  0.011504805  0.008878092  0.006339163
## [11]  0.003879764  0.001492858 -0.000827609 -0.003086896 -0.005289600
## [16] -0.007439771 -0.009540998 -0.011596478 -0.013609075 -0.015581366
## [21] -0.017515679 -0.019414128 -0.021278637 -0.023110963 -0.024912717
## [26] -0.026685381 -0.028430319 -0.030148793 -0.031841972 -0.033510941
## [31] -0.035156710 -0.036780220 -0.038382351 -0.039963925 -0.041525714
## [36] -0.043068444 -0.044592793 -0.046099406 -0.047588886 -0.049061805
## 
## $ul
##  [1] 0.1362624 0.1399499 0.1434054 0.1466679 0.1497666 0.1527239 0.1555575
##  [8] 0.1582818 0.1609085 0.1634474 0.1659068 0.1682937 0.1706142 0.1728735
## [15] 0.1750762 0.1772263 0.1793276 0.1813831 0.1833956 0.1853679 0.1873023
## [22] 0.1892007 0.1910652 0.1928975 0.1946993 0.1964720 0.1982169 0.1999354
## [29] 0.2016285 0.2032975 0.2049433 0.2065668 0.2081689 0.2097505 0.2113123
## [36] 0.2128550 0.2143794 0.2158860 0.2173755 0.2188484
## 
## $resid
##   [1]  0.000000e+00  7.766441e-02 -2.087104e-02  8.347763e-02  8.108788e-02
##   [6] -4.868125e-02 -5.979305e-02 -7.581236e-02 -5.616936e-02 -5.117963e-02
##  [11]  5.527362e-02 -5.122946e-06 -7.698556e-02  7.376985e-02 -3.676559e-02
##  [16]  1.723690e-02  1.486201e-02 -9.605920e-03  4.583775e-02  2.131918e-02
##  [21] -2.989453e-02 -1.961301e-02  8.828622e-03  1.752415e-02  9.813993e-03
##  [26]  4.581003e-02 -1.135203e-02  2.801457e-02  3.486975e-02  1.945107e-02
##  [31]  1.681491e-02 -8.497345e-02  1.624352e-02 -1.318626e-02  8.117023e-04
##  [36]  8.247929e-03 -7.473808e-04  1.520034e-02  1.556065e-02 -1.309813e-02
##  [41] -2.117926e-02 -3.143723e-02  3.299033e-02 -1.212708e-02  5.194006e-03
##  [46] -6.495391e-03 -1.729115e-02 -1.148272e-02 -1.234142e-02 -9.050180e-02
##  [51]  1.988479e-02 -7.754294e-03 -1.965136e-02 -9.351329e-03 -2.562852e-02
##  [56] -6.593893e-03 -1.230225e-02  6.011534e-03 -1.165136e-02 -1.008836e-03
##  [61] -9.792586e-03 -1.765100e-02 -2.546018e-02  4.003576e-03 -8.745599e-03
##  [66] -3.825964e-03 -9.951711e-03 -1.045165e-02 -3.944013e-02  1.495623e-03
##  [71] -6.102772e-03  3.173990e-03 -2.440380e-03 -2.247199e-03 -1.524607e-02
##  [76] -4.533009e-03 -1.000656e-02 -5.745793e-03  1.519587e-03  3.547392e-03
##  [81] -2.485419e-03 -4.186732e-03 -1.078829e-02  3.244309e-03  1.048790e-02
##  [86] -4.386930e-03 -6.437752e-03  3.579397e-03  1.007811e-03 -4.489523e-03
##  [91] -4.539783e-03 -6.997813e-03 -2.614985e-03 -6.868797e-04 -1.450890e-03
##  [96] -3.892101e-03 -1.547749e-03 -2.994660e-03  5.838588e-03  3.284515e-03
## [101] -8.225706e-03  7.060214e-03 -1.542901e-03 -2.672595e-03  6.422571e-03
## [106]  2.349912e-03  6.003221e-03  4.639297e-03  4.742350e-03 -2.910855e-04
## [111]  5.048694e-03  1.076669e-02  1.620161e-02 -4.284476e-03  9.717002e-03
## [116]  4.800781e-03  1.674671e-03 -1.258471e-02  4.489182e-03  1.650284e-02
## [121]  9.279507e-03  1.277572e-03  3.474781e-03 -1.412238e-02  1.769164e-02
## [126]  1.399145e-03  1.827950e-02 -3.934242e-03 -5.481609e-03  1.484647e-02
## [131] -6.194814e-03 -6.680009e-03 -2.364427e-03  1.908099e-03
## 
## $wnv
## [1] 0.0006868975
## 
## $se
##  [1] 0.02620873 0.02809009 0.02985312 0.03151768 0.03309863 0.03460744
##  [7] 0.03605316 0.03744310 0.03878326 0.04007863 0.04133343 0.04255124
## [13] 0.04373515 0.04488785 0.04601168 0.04710870 0.04818076 0.04922947
## [19] 0.05025631 0.05126258 0.05224947 0.05321807 0.05416935 0.05510421
## [25] 0.05602347 0.05692789 0.05781817 0.05869494 0.05955881 0.06041032
## [31] 0.06125000 0.06207832 0.06289573 0.06370266 0.06449949 0.06528660
## [37] 0.06606433 0.06683301 0.06759294 0.06834443
## 
## $psi
##  [1] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
##  [8] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [15] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [22] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [29] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## [36] 0.3856425 0.3856425 0.3856425 0.3856425 0.3856425
## 
## $ptot
## [1] 1
## 
## $phitot
## [1] 1
#very very straight line, not good!
# calculating ASE
f_dposrater_d1<-fore.aruma.wge(dposrater,d=1, phi = estd1bic$phi,theta = estd1bic$theta, n.ahead = 20, lastn= T, limits=F )

ase_dposrater_d1_aic = mean((f_dposrater_d1$f - dposrater[length(f_dposrater_d1$f)-20+1:length(f_dposrater_d1$f)])^2)
ase_dposrater_d1_aic
## [1] 0.01633175
# trial run, late decide to run data from May 03
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try to work on positive count, rather than rate
plotts.sample.wge(dposr)

## $autplt
##  [1]  1.00000000  0.94030333  0.87704100  0.82310353  0.78139908  0.74729768
##  [7]  0.71461761  0.67511182  0.60755356  0.54009064  0.48457571  0.44771154
## [13]  0.41170553  0.37127579  0.32238768  0.25466332  0.19187584  0.14685491
## [19]  0.10759282  0.07496058  0.04743082  0.01547334 -0.02913890 -0.07022287
## [25] -0.09574474 -0.10917801
## 
## $freq
##  [1] 0.007462687 0.014925373 0.022388060 0.029850746 0.037313433 0.044776119
##  [7] 0.052238806 0.059701493 0.067164179 0.074626866 0.082089552 0.089552239
## [13] 0.097014925 0.104477612 0.111940299 0.119402985 0.126865672 0.134328358
## [19] 0.141791045 0.149253731 0.156716418 0.164179104 0.171641791 0.179104478
## [25] 0.186567164 0.194029851 0.201492537 0.208955224 0.216417910 0.223880597
## [31] 0.231343284 0.238805970 0.246268657 0.253731343 0.261194030 0.268656716
## [37] 0.276119403 0.283582090 0.291044776 0.298507463 0.305970149 0.313432836
## [43] 0.320895522 0.328358209 0.335820896 0.343283582 0.350746269 0.358208955
## [49] 0.365671642 0.373134328 0.380597015 0.388059701 0.395522388 0.402985075
## [55] 0.410447761 0.417910448 0.425373134 0.432835821 0.440298507 0.447761194
## [61] 0.455223881 0.462686567 0.470149254 0.477611940 0.485074627 0.492537313
## [67] 0.500000000
## 
## $db
##  [1]   4.9424430  14.1064877  12.1363565   8.0575206   2.2824269   0.6438849
##  [7]   2.2390356   0.1335972  -0.9670253   0.1923986  -2.8709965  -2.5687599
## [13]  -2.7559601  -5.6278100  -5.0698519  -9.6786254  -8.5964012  -6.3423115
## [19]  -3.3068006  -1.6718687  -2.9718973  -4.0934334  -8.1039003  -7.0961801
## [25]  -7.2102604 -11.7152365 -10.0702766 -10.2333089 -13.7776917  -9.9888756
## [31] -10.0537597  -9.9494512  -8.7717003  -9.6351943 -11.8016155 -14.1931335
## [37]  -9.0845318  -5.9426752  -9.5996526 -11.6511747 -19.7512049 -10.7586641
## [43] -14.1772539 -19.4372777 -13.1999421 -11.4926902 -10.1252570 -13.8628534
## [49] -18.0053337 -11.1500199 -15.1965479 -17.1489966 -17.6979597 -11.8316771
## [55] -12.1531021 -11.4287974 -12.2683953 -11.3614425 -10.6478686 -14.5484499
## [61] -12.6329743 -16.3061089 -11.5235513 -19.1052360 -12.6822944 -20.7582976
## [67] -23.4465724
## 
## $dbz
##  [1]  10.83907999  10.40847146   9.68852600   8.67732615   7.37603935
##  [6]   5.79525151   3.96744682   1.96840355  -0.05783602  -1.90328744
## [11]  -3.38061144  -4.43927046  -5.15017307  -5.58206215  -5.75586721
## [16]  -5.70322187  -5.51462375  -5.31511206  -5.21494784  -5.28702279
## [21]  -5.56976984  -6.07657283  -6.80095441  -7.71458905  -8.75880357
## [26]  -9.83516604 -10.81114406 -11.56132640 -12.03055973 -12.25441514
## [31] -12.31134349 -12.26725653 -12.16136809 -12.02201018 -11.88332370
## [36] -11.78833610 -11.78109525 -11.89659336 -12.15330767 -12.54802407
## [41] -13.05148352 -13.60651129 -14.13566281 -14.56620172 -14.86514574
## [46] -15.05531739 -15.19338515 -15.32913346 -15.47637632 -15.60711387
## [51] -15.66805360 -15.61381648 -15.43877115 -15.18317638 -14.91121342
## [56] -14.68355173 -14.54255473 -14.51040219 -14.59297578 -14.78423541
## [61] -15.06901729 -15.42393088 -15.81680534 -16.20587116 -16.54099161
## [66] -16.77004861 -16.85172798
aic5.wge(dposr,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 4 2 
## Error in aic calculation at 4 3 
## Error in aic calculation at 5 0 
## Error in aic calculation at 6 0 
## Error in aic calculation at 6 1 
## Error in aic calculation at 6 2 
## Error in aic calculation at 6 4 
## Error in aic calculation at 7 0 
## Error in aic calculation at 7 2 
## Error in aic calculation at 7 3 
## Error in aic calculation at 7 4 
## Five Smallest Values of  aic
##       p    q        aic
## 43    8    2   15.68195
## 47    9    1   15.68653
## 51   10    0   15.69133
## 42    8    1   15.69172
## 46    9    0   15.69267
# pick 8,2, better AIC 15.68 on count, vs. -7.4 on rate
aic5.wge(dposr,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 4 2 
## Error in aic calculation at 4 3 
## Error in aic calculation at 5 0 
## Error in aic calculation at 6 0 
## Error in aic calculation at 6 1 
## Error in aic calculation at 6 2 
## Error in aic calculation at 6 4 
## Error in aic calculation at 7 0 
## Error in aic calculation at 7 2 
## Error in aic calculation at 7 3 
## Error in aic calculation at 7 4 
## Five Smallest Values of  bic
##       p    q        bic
## 42    8    1   15.90798
## 46    9    0   15.90892
## 43    8    2   15.91983
## 47    9    1   15.92442
## 51   10    0   15.92921
# pick 8, 1, not much different
# estimate model phi and theta
estaic_ct <- est.arma.wge(dposr, p=8, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## 1.8021 -1.1264 0.2207 0.2179 -0.2538 0.5316 -0.2885 -0.1119 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9615B+0.9641B^2    1.0173+-0.0483i      0.9819       0.0076
## 1-1.2266B+0.9629B^2    0.6369+-0.7955i      0.9813       0.1426
## 1+0.3863B+0.6365B^2   -0.3034+-1.2161i      0.7978       0.2889
## 1+0.7458B             -1.3409               0.7458       0.5000
## 1+0.2539B             -3.9389               0.2539       0.5000
##   
## 
f_dposr_ct <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_dposr_ct = mean((f_dposr_ct$f - dposr[length(f_dposr_ct$f)-20+1:length(f_dposr_ct$f)])^2)
ase_dposr_ct
## [1] 1949770638
# ASE 1949770638, too large, try to take a diff
# complete the model, test residual
ljung.wge(f_dposr_ct$res,p=8,q=2)
## Obs -0.01394181 -0.01704525 -0.01386465 -0.01703005 0.005111981 -0.009188232 -0.02885949 0.004736653 -0.0180852 0.04396489 -0.03633083 -0.03272244 0.0008131524 0.06869748 -0.005879667 -0.03381658 0.01973066 0.07871697 -0.1226284 0.0004623474 0.01575949 0.03214822 -0.03849347 -0.009459984
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 5.742086
## 
## $df
## [1] 14
## 
## $pval
## [1] 0.9725793
# pval = 0.9725793
plotts.wge(f_dposr_ct$res)

# forecast out 
f_dposr_ct_7 <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 7, lastn= F, limits=T )

f_dposr_ct_90 <- fore.aruma.wge(dposr, phi = estaic_ct$phi,theta = estaic_ct$theta, n.ahead = 90, lastn= F, limits=T )

# trial run, late decide to run data from May 03
# get the test positive rate
# us4 dataset contains all variables needed for both Uni and multi variant analysis, already  sequence reversed, older one first.
dposrater_may = us4$posrate
plotts.sample.wge(dposrater_may)

## $autplt
##  [1]  1.000000000  0.793243460  0.775540019  0.685701466  0.657283710
##  [6]  0.570114234  0.551260793  0.463950606  0.455998446  0.371238640
## [11]  0.323622942  0.243563452  0.204482178  0.152947322  0.142972844
## [16]  0.061880862 -0.009661695 -0.005742598 -0.040030206 -0.120922069
## [21] -0.146085583 -0.172481197 -0.216397590 -0.237802101 -0.300278105
## [26] -0.320390970
## 
## $freq
##  [1] 0.01369863 0.02739726 0.04109589 0.05479452 0.06849315 0.08219178
##  [7] 0.09589041 0.10958904 0.12328767 0.13698630 0.15068493 0.16438356
## [13] 0.17808219 0.19178082 0.20547945 0.21917808 0.23287671 0.24657534
## [19] 0.26027397 0.27397260 0.28767123 0.30136986 0.31506849 0.32876712
## [25] 0.34246575 0.35616438 0.36986301 0.38356164 0.39726027 0.41095890
## [31] 0.42465753 0.43835616 0.45205479 0.46575342 0.47945205 0.49315068
## 
## $db
##  [1]  14.6628314  -1.6736907  -0.9266359 -13.5179982 -14.7748931  -4.7699180
##  [7]  -7.9912762  -7.2133867  -2.4763669  -8.3631330  -5.1740948  -6.4803604
## [13] -21.5733796 -10.0005043 -11.1183279  -1.7859093 -29.3217186 -15.6758582
## [19] -12.8292962 -16.5949786  -3.3162462 -14.9177477 -13.9091540 -10.4959331
## [25]  -6.7994221  -9.8619573 -11.6744305  -8.6133321  -4.2755790  -9.9373453
## [31]  -9.8738467  -7.8602514 -12.7419835  -8.2505300 -14.8437007  -1.6814680
## 
## $dbz
##  [1]  8.9698701  8.1680800  6.8363320  5.0009416  2.7544483  0.3412567
##  [7] -1.7935859 -3.2673252 -4.1706963 -4.8589492 -5.5349957 -6.1993758
## [13] -6.7535150 -7.1066131 -7.2645159 -7.3334652 -7.4303057 -7.6018727
## [19] -7.8158662 -8.0046486 -8.1150614 -8.1261203 -8.0434268 -7.9020723
## [25] -7.7696559 -7.7228794 -7.8076870 -8.0090148 -8.2362330 -8.3257131
## [31] -8.0933503 -7.4650627 -6.5695468 -5.6529713 -4.9335321 -4.5446295
# straight to ARMA
aic5.wge(dposrater_may,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 38    7    2 -0.3104626
## 52   10    1 -0.3080242
## 13    2    2 -0.2761237
## 51   10    0 -0.2540448
## 18    3    2 -0.2493179
# pick 7,2, AIC -9.5 vs. -7.4 on mar data start
aic5.wge(dposrater_may,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic
##       p    q        bic
## 7     1    1 -0.1463971
## 11    2    0 -0.1393156
## 13    2    2 -0.1192430
## 8     1    2 -0.1034945
## 12    2    1 -0.1000530
# pick 1, 1, BIC=-9.3567
# estimate aic pick model phi and theta
estaic_may <- est.arma.wge(dposrater_may, p=7, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## 0.6672 0.7216 -0.2318 -0.0038 0.0350 0.0791 -0.2982 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9829B+0.9894B^2    1.0021+-0.0810i      0.9947       0.0128
## 1+0.9249B             -1.0812               0.9249       0.5000
## 1+0.9977B+0.5764B^2   -0.8654+-0.9929i      0.7592       0.3641
## 1-0.6068B+0.5654B^2    0.5366+-1.2168i      0.7519       0.1839
##   
## 
f_dposr_may <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_dposrater_may = mean((f_dposr_may$f - dposrater_may[(length(dposrater_may)-20+1):length(dposrater_may)])^2)
ase_dposrater_may
## [1] 3.689831
# ASE 3.689831
# complete the model, test residual
ljung.wge(f_dposr_may$res,p=7,q=2)
## Obs -0.01046769 -0.03701726 0.02093265 0.004374793 0.08179569 -0.03834407 0.1781997 0.03366438 -0.0241363 0.001220277 -0.1151678 -0.1432698 0.01303198 0.2018446 0.02189474 -0.2666775 0.0101408 0.04010955 -0.1782199 -0.06381621 0.04755497 -0.04394528 0.06636473 -0.04428351
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 22.23227
## 
## $df
## [1] 15
## 
## $pval
## [1] 0.1018566
# pval = 0.1018566
plotts.wge(f_dposr_may$res)

# forecast out 
f_dposr_may_7 <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 7, lastn= F, limits=T )

f_dposr_may_90 <- fore.aruma.wge(dposrater_may, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = 90, lastn= F, limits=T )

#not too happy, but does do pretty good job on long term, model of choice for us long term without MLP
us5 = ts(us4$posrate)
# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()
MeanHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
    forecasts_aruma = fore.aruma.wge(dposrater_may[i:(i+(trainingSize-1))], d=0, phi = estaic_may$phi,theta = estaic_may$theta, n.ahead = horizon)
   
  ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts_aruma$f)^2)
  
  ASEHolder[i] = ASE
  MeanHolder[i] = forecasts_aruma$f
}
## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

## Warning in MeanHolder[i] <- forecasts_aruma$f: number of items to replace is not
## a multiple of replacement length

ASEHolder #  taking 10 mins to run
##  [1]  5.425324 29.145427 12.978260  3.377284  2.265916  1.044848  1.040106
##  [8]  5.505741  9.146799  7.417582  8.226351  2.577548  1.385351  2.279227
## [15]  2.751509  5.012060  1.180703
MeanHolder
##  [1] 4.881010 3.573048 4.848994 5.986529 6.222015 7.134091 7.048535 6.286977
##  [9] 5.695387 6.909940 6.506387 7.289995 7.659611 7.781025 7.727248 7.554135
## [17] 8.108002
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.040   2.266   3.377   5.927   7.418  29.145
WindowedASE #5.927061
## [1] 5.927061
# try diff once
dposrater_may_d1 <- artrans.wge(dposrater_may,phi.tr = 1)

aic5.wge(dposrater_may_d1,p=0:15,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 4 2 
## Error in aic calculation at 15 0 
## Error in aic calculation at 15 1 
## Five Smallest Values of  aic
##       p    q        aic
## 73   14    2 -0.6180425
## 78   15    2 -0.6028138
## 79   15    3 -0.4969102
## 68   13    2 -0.4967744
## 66   13    0 -0.3751314
# pick 14, 2, AIC=-9.8, tiny larger. not worth the effort of extra degrees

# estimate d1 pick model phi and theta
estaic_may_d1 <- est.arma.wge(dposrater_may_d1, p=14, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## -1.3939 -1.4922 -0.9023 -0.3904 0.0533 0.5522 0.8376 1.1217 0.9843 0.8481 0.2600 -0.1777 -0.6017 -0.3647 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.0904B+0.9744B^2   -0.5595+-0.8445i      0.9871       0.3431
## 1+0.4407B+0.9620B^2   -0.2291+-0.9935i      0.9808       0.2861
## 1-1.2546B+0.9616B^2    0.6524+-0.7838i      0.9806       0.1395
## 1-0.4112B+0.9281B^2    0.2215+-1.0141i      0.9634       0.2158
## 1+0.9617B             -1.0398               0.9617       0.5000
## 1+1.5692B+0.9156B^2   -0.8569+-0.5982i      0.9569       0.4030
## 1-0.9180B              1.0893               0.9180       0.0000
## 1-0.7777B              1.2859               0.7777       0.0000
## 1+0.6934B             -1.4421               0.6934       0.5000
##   
## 
f_dposr_may_d1 <- fore.aruma.wge(dposrater_may, d=1,phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 20, lastn= T, limits=F )

# looking good

# ASE
ase_dposrater_may_d1 = mean((f_dposr_may_d1$f - dposrater_may[(length(dposrater_may)-20+1):length(dposrater_may)])^2)
ase_dposrater_may_d1
## [1] 0.4512103
# ASE 0.4512103
# complete the model, test residual
ljung.wge(f_dposr_may_d1$res,p=14,q=2)
## Obs -0.06181709 -0.1193952 0.04077549 -0.02389429 -0.02705438 -0.01684004 -0.01841777 -0.1925159 -0.01702458 0.03846218 -0.1202376 0.1332637 0.03500123 0.002342406 0.1593079 0.09893685 -0.07997416 -0.1485467 -0.04379409 -0.03564445 -0.09591164 -0.1030234 0.1236619 -0.01867997
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 18.23846
## 
## $df
## [1] 8
## 
## $pval
## [1] 0.01950805
# pval = 0.01950805, not white noise
plotts.wge(f_dposr_may_d1$res)

# forecast out 
f_dposr_may_d1_7 <- fore.aruma.wge(dposrater_may, d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 7, lastn= F, limits=T )

f_dposr_may_d1_90 <- fore.aruma.wge(dposrater_may, d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = 90, lastn= F, limits=T )

#short term looks good, but not long term. 

# chosen for short term us positive rate forecast, calculate rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
    forecasts = fore.aruma.wge(dposrater_may[i:(i+(trainingSize-1))], d=1, phi = estaic_may_d1$phi,theta = estaic_may_d1$theta, n.ahead = horizon)
   
  ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 0.5696323 0.5377582 1.0019927 0.4707286 0.5493673 0.9692764 0.3692563
##  [8] 0.9992082 1.1433232 0.8044940 0.6157031 0.5826998 0.6736881 1.0265688
## [15] 0.2698419 0.4965949 1.0957465
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2698  0.5378  0.6157  0.7162  0.9992  1.1433
WindowedASE #0.7162283
## [1] 0.7162283
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try to see if diff make sense on count data
dposr_d1 <- artrans.wge(dposr, phi.tr = 1)

# looks good, now near 0 frequency is gone, higher frequency shows up. ACF show cyclic behavior
aic5.wge(dposr_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 3 2 
## Error in aic calculation at 3 4 
## Error in aic calculation at 4 3 
## Error in aic calculation at 4 4 
## Error in aic calculation at 5 2 
## Error in aic calculation at 5 3 
## Error in aic calculation at 5 4 
## Error in aic calculation at 7 4 
## Error in aic calculation at 8 3 
## Error in aic calculation at 9 3 
## Error in aic calculation at 10 2 
## Five Smallest Values of  aic
##       p    q        aic
## 33    6    2   15.66430
## 38    7    2   15.67852
## 34    6    3   15.67977
## 39    7    3   15.68838
## 41    8    0   15.68977
# pick 6,2 AIC 15.66, not changing much
estaic_d1_ct <- est.arma.wge(dposr_d1, p=6, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## 0.9223 -0.4204 -0.1074 0.1150 -0.1663 0.4261 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.2326B+0.9650B^2    0.6387+-0.7927i      0.9823       0.1421
## 1-0.9140B              1.0941               0.9140       0.0000
## 1+0.7878B             -1.2693               0.7878       0.5000
## 1+0.4365B+0.6132B^2   -0.3560+-1.2264i      0.7831       0.2950
##   
## 
f_dposr_d1_ct <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_dposr_d1_ct = mean((f_dposr_d1_ct$f - dposr[length(f_dposr_d1_ct$f)-20+1:length(f_dposr_d1_ct$f)])^2)
ase_dposr_d1_ct
## [1] 1860690299
# 1860690299
# residual white noise test
ljung.wge(f_dposr_d1_ct$res,p=6,q=2)
## Obs -0.01040152 -0.0005907605 0.0005360688 -0.009483016 0.02118629 -0.01015082 0.01665095 0.03162182 -0.002579604 0.06602495 -0.0182375 -0.0342344 0.004088701 0.06722949 -0.003539669 -0.03814393 0.01140429 0.07346267 -0.1307832 -0.007597847 -0.001230379 0.02043867 -0.05278242 -0.02973383
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 6.331486
## 
## $df
## [1] 16
## 
## $pval
## [1] 0.9841029
# pval = 0.9841029, can't reject Ho which is white noise
# plot the residual
plotts.wge(f_dposr_d1_ct$res)

#forecast out
f_dposr_d1_ct_7 <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 7, lastn= F, limits=T )

f_dposr_d1_ct_90 <- fore.aruma.wge(dposr, d=1,phi = estaic_d1_ct$phi,theta = estaic_d1_ct$theta, n.ahead = 90, lastn= F, limits=T )

# give up on count analysis, project will focus on positive rate
#  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# try a 7 days cycle on count data
dposr_s7 <- artrans.wge(dposr, phi.tr = c(0,0,0,0,0,0,1))

# ACF damping fast
aic5.wge(dposr_s7,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 2 2 
## Error in aic calculation at 2 3 
## Error in aic calculation at 3 2 
## Error in aic calculation at 4 1 
## Error in aic calculation at 4 3 
## Error in aic calculation at 4 4 
## Error in aic calculation at 5 1 
## Error in aic calculation at 5 2 
## Error in aic calculation at 5 3 
## Error in aic calculation at 5 4 
## Error in aic calculation at 6 3 
## Error in aic calculation at 7 4 
## Error in aic calculation at 8 4 
## Five Smallest Values of  aic
##       p    q        aic
## 41    8    0   15.82318
## 37    7    1   15.83336
## 46    9    0   15.83697
## 39    7    3   15.83701
## 42    8    1   15.83741
# pick 8,0 AIC 15.82, not changing much
estaic_s7_ct <- est.arma.wge(dposr_s7, p=8, q=0, factor = T)
## 
## Coefficients of Original polynomial:  
## 0.6576 0.1678 0.0280 0.1679 -0.0174 0.1951 -0.5637 0.2718 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.9658B             -1.0354               0.9658       0.5000
## 1-0.9181B              1.0892               0.9181       0.0000
## 1+1.0820B+0.8127B^2   -0.6656+-0.8873i      0.9015       0.3524
## 1-0.4572B+0.7852B^2    0.2911+-1.0903i      0.8861       0.2085
## 1-1.3300B+0.4803B^2    1.3846+-0.4062i      0.6930       0.0454
##   
## 
f_dposr_s7_ct <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 20, lastn= T, limits=F )

#still not very good, much lower than original data
#ASE
ase_dposr_s7_ct = mean((f_dposr_s7_ct$f - dposr[length(f_dposr_s7_ct$f)-20+1:length(f_dposr_s7_ct$f)])^2)
ase_dposr_s7_ct
## [1] 1878125434
# 1878125434, not changing much
# residual white noise test
ljung.wge(f_dposr_s7_ct$res,p=8,q=0)
## Obs 0.01375532 -0.0008986191 -0.006465254 -0.02174787 0.02371531 0.04285527 -0.09327852 0.07453255 0.02729956 0.01647333 -0.02750212 0.03522911 0.08293654 -0.1403013 0.01589859 0.0296336 0.002374378 0.05289711 -0.0823588 -0.02831454 -0.02719726 -0.0117039 -0.03319747 0.002022641
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 9.103533
## 
## $df
## [1] 16
## 
## $pval
## [1] 0.9090886
# pval = 0.9090886, lower, still good. can't reject Ho which is white noise
# plot the residual
plotts.wge(f_dposr_s7_ct$res)

#forecast out
f_dposr_s7_ct_7 <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 7, lastn= F, limits=T )

f_dposr_s7_ct_90 <- fore.aruma.wge(dposr, s=7,phi = estaic_s7_ct$phi,theta = estaic_s7_ct$theta, n.ahead = 90, lastn= F, limits=T )

#################################### VAR, MLP, Rolling ASE for US ########################

#us3 
#us2$hospitalizedCurrently  0.722
#us2$deathIncrease    0.24
#us2$inIcuCurrently   0.205
# we'll move forward for multi variants analysis factor these 3 variables
ccf(us4$posrate,us4$hospitalized)

ccf(us4$posrate,us4$deathIncrease)

ccf(us4$posrate,us4$inIcu)

VARselect(us4,type='both',lag.max=16) #7, many Infs
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     14     14     14     14 
## 
## $criteria
##                   1            2            3            4            5
## AIC(n) 3.393755e+01 3.381777e+01 3.395491e+01 3.391049e+01 3.398697e+01
## HQ(n)  3.427186e+01 3.437496e+01 3.473498e+01 3.491344e+01 3.521280e+01
## SC(n)  3.479778e+01 3.525149e+01 3.596211e+01 3.649119e+01 3.714116e+01
## FPE(n) 5.498560e+14 4.934478e+14 5.810818e+14 5.833917e+14 6.817288e+14
##                   6            7            8            9           10
## AIC(n) 3.379562e+01 3.376441e+01 3.379794e+01 3.335921e+01 3.242349e+01
## HQ(n)  3.524432e+01 3.543599e+01 3.569239e+01 3.547654e+01 3.476370e+01
## SC(n)  3.752329e+01 3.806557e+01 3.867259e+01 3.880735e+01 3.844512e+01
## FPE(n) 6.358035e+14 7.374125e+14 9.887555e+14 9.276070e+14 6.301837e+14
##                  11           12            13   14   15   16
## AIC(n) 3.104646e+01 2.819272e+01 -13.162250113 -Inf -Inf -Inf
## HQ(n)  3.360954e+01 3.097869e+01 -10.153410500 -Inf -Inf -Inf
## SC(n)  3.764157e+01 3.536132e+01  -5.420161098 -Inf -Inf -Inf
## FPE(n) 3.674662e+14 8.578404e+13   0.001840854    0    0    0
lsfit_us4=VAR(us4, p=8, type ='both')###############

# short term forecast VAR US
preds_us4=predict(lsfit_us4,n.ahead=7)
preds_us4$fcst$posrate[1:7,1]
## [1] 10.177700 11.510468  7.554877  9.033158  7.683985  7.470714  7.032057
#fanchart(preds_us4)
# making graph for 8 ahead prediction
#us4_predgraph = c(us4$posrate,rep('na',7))
plot(us4$posrate, type="o",xlim=c(1,81),ylim=c(-6,15))
points(seq(74,80,1),preds_us4$fcst$posrate[1:7,1], type='o',col="blue")

# long term forecast VAR US
preds_us4L=predict(lsfit_us4,n.ahead=90)
preds_us4L$fcst$posrate[1:90,1]
##  [1]   10.1777004   11.5104676    7.5548772    9.0331583    7.6839851
##  [6]    7.4707137    7.0320572    7.1980868    9.1008139    9.4804441
## [11]    8.2259180    8.1660903    6.0949405    4.1022494    3.2807808
## [16]    1.6878357    3.8696523    4.6755316    5.1433971    5.4077484
## [21]    2.8775605   -0.5807645   -4.2229234   -7.4240941   -6.8022557
## [26]   -5.4083456   -3.1302017   -0.6095270   -2.2620118   -6.2219996
## [31]  -13.1515617  -20.3006638  -23.5869966  -23.7022419  -19.7192221
## [36]  -13.5690574  -10.6946722  -12.3164181  -20.7368168  -33.2876307
## [41]  -44.1329109  -50.5899560  -48.1919268  -38.3716289  -27.1388097
## [46]  -20.0782161  -24.0957446  -39.5683759  -60.9272607  -80.9718260
## [51]  -89.0429999  -81.0711788  -60.6410955  -36.8058252  -24.1992224
## [56]  -31.7396542  -59.9117578  -99.8513103 -132.8910845 -142.9411182
## [61] -122.8899937  -79.1785383  -33.4728716   -9.7525900  -25.1846549
## [66]  -79.9259122 -151.8654693 -208.3298847 -218.7448997 -171.3330278
## [71]  -83.9062392    4.0312521   45.6383331   10.7470656  -94.2740127
## [76] -227.1778047 -323.6129935 -328.2104501 -225.6841714  -52.4039578
## [81]  111.9007323  180.1599079  102.8339932 -102.8005632 -348.9702796
## [86] -513.5071046 -498.3468431 -282.2752258   53.8937062  355.1147335
# making graph for 90 ahead prediction
plot(us4$posrate, type="o",xlim=c(1,164),ylim=c(-20,15))
points(seq(74,163,1),preds_us4L$fcst$posrate[1:90,1], type='o',col="blue")

# looks only following the upwards trend, not good.

#try to put few variables in exogen instead######################################################
us4_var1 = data.frame(hospitalized=us4$hospitalized,posrate=us4$posrate)
us4_var_exo1 = data.frame(deathIncrease=us4$deathIncrease,inIcu=us4$inIcu)
# exclude deathIncrease, forecast inlcude too many negative value
us4_var_exo2 = data.frame(inIcu=us4$inIcu)

VARselect(us4_var1,type='both',exogen=us4_var_exo2)
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2 
## 
## $criteria
##                   1            2            3            4            5
## AIC(n)     12.99685     12.84292     12.89945     12.96475     12.96783
## HQ(n)      13.13065     13.03023     13.14028     13.25910     13.31570
## SC(n)      13.33703     13.31917     13.51177     13.71315     13.85230
## FPE(n) 441317.45556 378798.13571 401669.20174 430189.38883 433589.89452
##                   6            7            8            9           10
## AIC(n)     12.84663     12.90337     12.94491     13.00587     12.99910
## HQ(n)      13.24801     13.35827     13.45333     13.56780     13.61456
## SC(n)      13.86717     14.05998     14.23760     14.43462     14.56393
## FPE(n) 386645.57666 412824.56086 435227.45547 469219.78481 474360.40778
# AIC 2, SC 2
lsfit_us4_var1=VAR(us4_var1, p=2, type ='both',exogen=us4_var_exo2)

# short term forecast VAR1 US 
#dummy exogen for forecast, we did forecast using univariant MLP for all variables
hospitalized = ts(us4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)

hospitalized7 = ts(c(us4$hospitalized, f_fitmlp_hospitalized7$mean))

deathIncrease = ts(us4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)

deathIncrease7 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease7$mean))# contain negative value in forecast, make no sense

inIcu = ts(us4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)

inIcu7 = ts(c(us4$inIcu, f_fitmlp_inIcu7$mean))

us6_var1_dummy1 = data.frame(inIcu=f_fitmlp_inIcu7$mean,deathIncrease=f_fitmlp_deathIncrease7$mean)
us6_var1_dummy2 = data.frame(inIcu=f_fitmlp_inIcu7$mean)

preds_us4_var1=predict(lsfit_us4_var1,n.ahead=7,dumvar= us6_var1_dummy2)
preds_us4_var1$fcst$posrate[1:7,1]
## [1] 8.307927 8.802843 8.856643 8.916684 8.913726 8.839815 8.781388
# all negative forecast either with or without deathIncrease in exogen, doesn't make sense. try to not use any exogen
VARselect(us4_var1,type='both')
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      6      6      2      6 
## 
## $criteria
##                   1            2            3            4            5
## AIC(n)     13.41537     13.19125     13.30293     13.26517     13.32024
## HQ(n)      13.52240     13.35181     13.51700     13.53276     13.64135
## SC(n)      13.68751     13.59947     13.84722     13.94553     14.13668
## FPE(n) 670451.96151 536279.38260 600603.14043 579883.51451 615176.14008
##                   6            7            8            9           10
## AIC(n)     12.96369     13.01147     13.04878     13.11047     13.11324
## HQ(n)      13.33832     13.43961     13.53044     13.64565     13.70194
## SC(n)      13.91619     14.10005     14.27343     14.47119     14.61003
## FPE(n) 433120.76486 457805.29001 479976.81921 517059.87816 526798.63612
# AIC 6, SC 2
lsfit_us4_var1=VAR(us4_var1, p=6, type ='both')

# short term forecast VAR1 US 
preds_us4_var1=predict(lsfit_us4_var1,n.ahead=7)
preds_us4_var1$fcst$posrate[1:7,1]
## [1] 6.806429 8.781841 8.257319 8.120434 7.739955 7.970919 6.653913
# making graph for 7 ahead prediction
plot(us4_var1$posrate, type="o",xlim=c(1,81),ylim=c(1,15))
points(seq(74,80,1),preds_us4_var1$fcst$posrate[1:7,1], type='o',col="blue")

# long term forecast VAR US
preds_us4L_var1=predict(lsfit_us4_var1,n.ahead=90)
preds_us4L_var1$fcst$posrate[1:90,1]
##  [1]    6.8064291    8.7818408    8.2573192    8.1204337    7.7399552
##  [6]    7.9709193    6.6539134    6.6929143    6.5985146    6.3767170
## [11]    6.0949278    5.8333064    5.3532196    4.5397360    4.0446575
## [16]    3.4206201    2.9813754    2.2690500    1.8499865    1.0324457
## [21]    0.2106176   -0.7451058   -1.5912829   -2.6459346   -3.5643894
## [26]   -4.5273223   -5.6046888   -6.7824202   -8.0161039   -9.3494171
## [31]  -10.7753136  -12.1705204  -13.6430699  -15.1568064  -16.7735223
## [36]  -18.4436695  -20.2549121  -22.1254575  -24.0758632  -26.0719063
## [41]  -28.1540137  -30.2941554  -32.5280900  -34.8658841  -37.3031521
## [46]  -39.8290805  -42.4425486  -45.1435547  -47.9192300  -50.7931278
## [51]  -53.7662611  -56.8487731  -60.0332772  -63.3291997  -66.7216918
## [56]  -70.2147667  -73.8080892  -77.5108606  -81.3236668  -85.2539243
## [61]  -89.3022331  -93.4663524  -97.7447216 -102.1377213 -106.6478035
## [66] -111.2766709 -116.0304294 -120.9104824 -125.9185104 -131.0531822
## [71] -136.3157336 -141.7050159 -147.2235124 -152.8734709 -158.6579681
## [76] -164.5782515 -170.6357650 -176.8306175 -183.1627205 -189.6327509
## [81] -196.2420398 -202.9923897 -209.8853220 -216.9224786 -224.1043462
## [86] -231.4313118 -238.9034546 -246.5214634 -254.2859770 -262.1982165
# making graph for 90 ahead prediction
plot(us4_var1$posrate, type="o",xlim=c(1,164),ylim=c(-5,11))
points(seq(74,163,1),preds_us4L_var1$fcst$posrate[1:90,1], type='o',col="blue")

# set negative value to 0, plot better graph for visualization
for (i in 1:length(preds_us4L_var1$fcst$posrate)) {
    if (preds_us4L_var1$fcst$posrate[i]<0.0) preds_us4L_var1$fcst$posrate[i]=0.0
    }

plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),preds_us4L_var1$fcst$posrate[1:90,1],col = "blue")

# Rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    usp = us4_var1[i:(i+trainingSize),]
    lsfit_us4_var1 = VAR(usp,p=6)
    forecasts = predict(lsfit_us4_var1,n.ahead = horizon)
  
  ASE = mean((us4_var1$posrate[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$fcst$posrate)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder
##  [1]  9.055827 10.101641 10.687193 10.924849 12.169259 11.624482 11.228665
##  [8] 12.814493 13.097033 13.808635 13.992837 14.199188 15.651549 15.768550
## [15] 15.306078 15.233905 14.224059
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   9.056  11.229  13.097  12.935  14.224  15.769
WindowedASE #12.9346
## [1] 12.9346
us5 = ts(us4$posrate)
library(nnfor)
set.seed(5)
fitmlp_usu=mlp(us5,rep=100)

# short term forecast MLP uni US
f_fitmlp_usu=forecast(fitmlp_usu,h=7)
plot(f_fitmlp_usu)

# long term forecasst MLP uni us
f_fitmlp_usuL=forecast(fitmlp_usu,h=90)
plot(f_fitmlp_usuL)

# graph better long term forecast with mean only
# there are some negative rate which does not make sense. normalize nagative to 0

for (i in 1:length(f_fitmlp_usuL$mean)) {
    if (f_fitmlp_usuL$mean[i]<0.0) f_fitmlp_usuL$mean[i]=0.0
    }

plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),f_fitmlp_usuL$mean,col = "blue")

# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    usp = ts(us5[i:(i+trainingSize)])
    fitmlp_us5 = mlp(usp,rep=25)
    forecasts = forecast(fitmlp_us5,h = horizon)
  
  ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 1.3385616 0.3610328 1.5497740 1.3087507 1.7626256 1.9780489 1.8837602
##  [8] 2.6597329 1.3403879 1.6741456 2.1399284 1.1686850 2.8522299 0.5572741
## [15] 0.9798475 1.7451546 4.7060941
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.361   1.309   1.674   1.765   1.978   4.706
WindowedASE #1.765061
## [1] 1.765061
#short term
# choose ARIMA(14,1,2), no ensemble. MLP sequentially decreasing, so is also ARMA(7.2)

# long term
ensemble_usL_uni = (f_fitmlp_usuL$mean + f_dposr_may_d1_90$f + f_dposr_may_90$f)/3
#8.255756 10.020159 10.202144 11.270059 10.230565 10.281909  9.750516
plot(us5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_usL_uni,col = "blue")

set.seed(5)
# forecast regressor value in order to MLP posrate forecast into the future.. short term
hospitalized = ts(us4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)

hospitalized7 = ts(c(us4$hospitalized, f_fitmlp_hospitalized7$mean))

deathIncrease = ts(us4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)

deathIncrease7 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease7$mean))# contain negative value in forecast, make no sense

inIcu = ts(us4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)

inIcu7 = ts(c(us4$inIcu, f_fitmlp_inIcu7$mean))
# setup regressor df
us6 = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7)
us6_mlp1 = data.frame(hospitalized=hospitalized7)

fitmlp_usm=mlp(us5,rep=30, xreg=us6_mlp1)
fitmlp_usm
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,2)
## Forecast combined using the median operator.
## MSE: 0.115.
f_fitmlp_usm=forecast(fitmlp_usm,h=7,xreg =us6_mlp1)
plot(f_fitmlp_usm)

# forecast regressor value in order to MLP posrate forecast into the future.. long term
#hospitalized = ts(us4$hospitalized)
#fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized90=forecast(fitmlp_hospitalized7,h=90)
plot(f_fitmlp_hospitalized90)

hospitalized90 = ts(c(us4$hospitalized, f_fitmlp_hospitalized90$mean))

#deathIncrease = ts(us4$deathIncrease)
#fitmlp_deathIncrease90=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease90=forecast(fitmlp_deathIncrease7,h=90)
plot(f_fitmlp_deathIncrease90)

deathIncrease90 = ts(c(us4$deathIncrease, f_fitmlp_deathIncrease90$mean)) # consider exclude this, too many negative values

#inIcu = ts(us4$inIcu)
#fitmlp_inIcu90=mlp(inIcu,rep=30)
f_fitmlp_inIcu90=forecast(fitmlp_inIcu7,h=90)
plot(f_fitmlp_inIcu90)

inIcu90 = ts(c(us4$inIcu, f_fitmlp_inIcu90$mean))
# setup regressor df
us6L = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90)
us6L_mlp1 = data.frame(hospitalized=hospitalized90)

set.seed(5)
fitmlp_usmL=mlp(us5,rep=30, xreg=us6L_mlp1)
fitmlp_usmL
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,2)
## Forecast combined using the median operator.
## MSE: 0.1481.
f_fitmlp_usmL=forecast(fitmlp_usmL,h=90,xreg =us6L_mlp1)
plot(f_fitmlp_usmL)

# set negative value to 0, plot better graph for visilization
for (i in 1:length(f_fitmlp_usmL$mean)) {
    if (f_fitmlp_usmL$mean[i]<0.0) f_fitmlp_usmL$mean[i]=0.0
    }

plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),f_fitmlp_usmL$mean,col = "blue")

# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    usp = ts(us5[i:(i+trainingSize)])
    usR = us6L[i:(i+trainingSize+horizon),]
    
    fitmlp_us6 = mlp(usp,rep=30,xreg=usR)
    forecasts = forecast(fitmlp_us6,h = horizon,xreg=usR)
  
  ASE = mean((us5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 2.4239388 1.5049129 2.2006868 4.1302711 0.7919529 0.9542558 1.3803651
##  [8] 1.1102737 1.6147460 0.8132092 2.5351754 0.8650088 2.1077007 0.5737346
## [15] 1.9161855 1.7809161 9.3917114
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5737  0.9543  1.6147  2.1232  2.2007  9.3917
WindowedASE #1.949447
## [1] 2.123238
#short term
ensemble_us = (f_fitmlp_usm$mean + preds_us4_var1$fcst$posrate[,1])/2

plot(us5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_us,col = "blue")

# long term
ensemble_usL = (f_fitmlp_usmL$mean + preds_us4L_var1$fcst$posrate[,1]+ ensemble_usL_uni)/3

plot(us5, type = "l",xlim=c(1,163),ylim=c(-1,12))
lines(seq(74,163,1),ensemble_usL,col = "blue")

*****************start to look into Illinois data *********************************

IL<-read.csv("/Data Folder/University/SMU/R for Time Series/Project/IL_daily.csv",header=T)
head(IL,2)
##       date state positive negative pending hospitalizedCurrently
## 1 20200715    IL   157825  1922908      NA                  1454
## 2 20200714    IL   156638  1885934      NA                  1416
##   hospitalizedCumulative inIcuCurrently inIcuCumulative onVentilatorCurrently
## 1                     NA            324              NA                   130
## 2                     NA            333              NA                   126
##   onVentilatorCumulative recovered dataQualityGrade   lastUpdateEt
## 1                     NA        NA                A 7/15/2020 0:00
## 2                     NA        NA                A 7/14/2020 0:00
##           dateModified     checkTimeEt death hospitalized          dateChecked
## 1 2020-07-15T00:00:00Z 7/14/2020 20:00  7427           NA 2020-07-15T00:00:00Z
## 2 2020-07-14T00:00:00Z 7/13/2020 20:00  7419           NA 2020-07-14T00:00:00Z
##   totalTestsViral positiveTestsViral negativeTestsViral positiveCasesViral
## 1         2079601                 NA                 NA             156693
## 2         2041440                 NA                 NA             155506
##   deathConfirmed deathProbable fips positiveIncrease negativeIncrease   total
## 1           7226           201   17             1187            36974 2080733
## 2           7218           201   17              707            27739 2042572
##   totalTestResults totalTestResultsIncrease  posNeg deathIncrease
## 1          2080733                    38161 2080733             8
## 2          2042572                    28446 2042572            25
##   hospitalizedIncrease                                     hash commercialScore
## 1                    0 c26e2751113806a83fd89ca8d7ccce0bedb527d0               0
## 2                    0 d735a7b1d033561a1bf7171d2483d999957e3e4a               0
##   negativeRegularScore negativeScore positiveScore score grade
## 1                    0             0             0     0    NA
## 2                    0             0             0     0    NA
# to get to daily test positive count
plotts.wge(IL$positiveIncrease)

# exclude early days only sick people gets tested
IL1<-subset.data.frame(IL,date>20200313)
IL2<-subset.data.frame(IL,date>20200503)

plotts.wge(IL1$positiveIncrease)

summary.data.frame(us1)
##       date              states         positive          negative       
##  Min.   :20200304   Min.   :15.00   Min.   :    757   Min.   :    1180  
##  1st Qu.:20200406   1st Qu.:56.00   1st Qu.: 380919   1st Qu.: 1615827  
##  Median :20200510   Median :56.00   Median :1324147   Median : 7543696  
##  Mean   :20200496   Mean   :54.93   Mean   :1320544   Mean   :11839006  
##  3rd Qu.:20200612   3rd Qu.:56.00   3rd Qu.:2037000   3rd Qu.:20403963  
##  Max.   :20200715   Max.   :56.00   Max.   :3478419   Max.   :39042608  
##                                                                         
##     pending      hospitalizedCurrently hospitalizedCumulative inIcuCurrently 
##  Min.   :  103   Min.   :  325         Min.   :     4         Min.   : 1299  
##  1st Qu.: 1894   1st Qu.:29749         1st Qu.: 49362         1st Qu.: 5627  
##  Median : 2758   Median :37750         Median :156192         Median : 7497  
##  Mean   : 8594   Mean   :37465         Mean   :137644         Mean   : 8580  
##  3rd Qu.: 4202   3rd Qu.:51020         3rd Qu.:223427         3rd Qu.:12166  
##  Max.   :65709   Max.   :59539         Max.   :269507         Max.   :15130  
##                  NA's   :13                                   NA's   :22     
##  inIcuCumulative onVentilatorCurrently onVentilatorCumulative   recovered      
##  Min.   :   74   Min.   : 167          Min.   :  39.0         Min.   :     97  
##  1st Qu.: 2370   1st Qu.:2197          1st Qu.: 227.0         1st Qu.:  92978  
##  Median : 7319   Median :3749          Median : 638.5         Median : 294312  
##  Mean   : 6388   Mean   :3655          Mean   : 616.3         Mean   : 400402  
##  3rd Qu.: 9665   3rd Qu.:5187          3rd Qu.: 880.2         3rd Qu.: 680916  
##  Max.   :12002   Max.   :7070          Max.   :1166.0         Max.   :1075882  
##  NA's   :21      NA's   :21            NA's   :28             NA's   :21       
##  dateChecked            death         hospitalized    lastModified      
##  Length:134         Min.   :    16   Min.   :     4   Length:134        
##  Class :character   1st Qu.: 12080   1st Qu.: 49362   Class :character  
##  Mode  :character   Median : 74493   Median :156192   Mode  :character  
##                     Mean   : 64541   Mean   :137644                     
##                     3rd Qu.:108200   3rd Qu.:223427                     
##                     Max.   :129595   Max.   :269507                     
##                                                                         
##      total          totalTestResults       posNeg         deathIncrease   
##  Min.   :    2040   Min.   :    1937   Min.   :    1937   Min.   :   1.0  
##  1st Qu.: 2013846   1st Qu.: 1996746   1st Qu.: 1996746   1st Qu.: 412.2  
##  Median : 8870918   Median : 8867843   Median : 8867843   Median : 854.5  
##  Mean   :13168144   Mean   :13159550   Mean   :13159550   Mean   : 967.0  
##  3rd Qu.:22442754   3rd Qu.:22440962   3rd Qu.:22440962   3rd Qu.:1438.0  
##  Max.   :42523974   Max.   :42521027   Max.   :42521027   Max.   :2740.0  
##                                                                           
##  hospitalizedIncrease negativeIncrease positiveIncrease
##  Min.   :-2841.0      Min.   :   464   Min.   :  141   
##  1st Qu.:  982.5      1st Qu.:107276   1st Qu.:19789   
##  Median : 1621.5      Median :280023   Median :24760   
##  Mean   : 2011.2      Mean   :291361   Mean   :25954   
##  3rd Qu.: 2652.2      3rd Qu.:457737   3rd Qu.:31015   
##  Max.   :17320.0      Max.   :756730   Max.   :66645   
##                                                        
##  totalTestResultsIncrease     hash          
##  Min.   :   617           Length:134        
##  1st Qu.:136983           Class :character  
##  Median :303788           Mode  :character  
##  Mean   :317315                             
##  3rd Qu.:478845                             
##  Max.   :823375                             
## 
il_dpos <- IL1$positiveIncrease
length(il_dpos)
## [1] 124
#select all possible correlated variables for daily test positive rate
library(tidyverse)
date = rev(IL2$date)
hospitalized=rev(IL2$hospitalizedCurrently)
inIcu=rev(IL2$inIcuCurrently)
onVent=rev(IL2$onVentilatorCurrently)
recovered=rev(IL2$recovered)
deathIncrease=rev(IL2$deathIncrease)
negativeIncrease=rev(IL2$negativeIncrease)
hospitalizedIncrease=rev(IL2$hospitalizedIncrease)
pending=rev(IL2$pending)

IL3 = data.frame (hospitalized,inIcu,onVent,deathIncrease,negativeIncrease,hospitalizedIncrease)       
IL3$posrate <- rev((IL2$positiveIncrease/IL2$totalTestResultsIncrease)*100)

library(GGally)
ggpairs(IL3) # too busy, looks many high corr, make a subset of IL3
## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

IL3_corr = data.frame(posrate=IL3$posrate,hospitalized=IL3$hospitalized,inIcu=IL3$inIcu,onVent=IL3$onVent,deathIncrease=IL3$deathIncrease,negativeIncrease=IL3$negativeIncrease)
ggpairs(IL3_corr)

#hospitalizedCurrently  0.91
#deathIncrease    0.634
#inIcuCurrently   0.882
#onVent                 0.872
#negativeIncrease       -0.654

# we'll move forward for multivariant analysis factor these 5 variables
# clean dataset ready for modeling analaysis
IL4 = IL3_corr
summary(IL4)
##     posrate        hospitalized      inIcu            onVent     
##  Min.   : 1.821   Min.   :1326   Min.   : 304.0   Min.   :126.0  
##  1st Qu.: 2.814   1st Qu.:1560   1st Qu.: 399.0   1st Qu.:216.0  
##  Median : 4.013   Median :2496   Median : 713.0   Median :437.0  
##  Mean   : 6.067   Mean   :2760   Mean   : 733.9   Mean   :427.4  
##  3rd Qu.: 9.060   3rd Qu.:3914   3rd Qu.:1031.0   3rd Qu.:607.0  
##  Max.   :16.922   Max.   :4862   Max.   :1266.0   Max.   :780.0  
##  deathIncrease    negativeIncrease
##  Min.   :  0.00   Min.   :11017   
##  1st Qu.: 25.00   1st Qu.:18561   
##  Median : 57.00   Median :21912   
##  Mean   : 65.88   Mean   :22810   
##  3rd Qu.: 96.00   3rd Qu.:27039   
##  Max.   :198.00   Max.   :37940
length(IL4$posrate)#73
## [1] 73
is.na.data.frame(IL4)# no missing value
##       posrate hospitalized inIcu onVent deathIncrease negativeIncrease
##  [1,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [2,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [3,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [4,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [5,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [6,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [7,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [8,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
##  [9,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [10,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [11,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [12,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [13,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [14,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [15,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [16,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [17,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [18,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [19,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [20,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [21,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [22,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [23,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [24,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [25,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [26,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [27,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [28,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [29,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [30,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [31,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [32,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [33,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [34,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [35,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [36,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [37,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [38,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [39,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [40,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [41,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [42,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [43,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [44,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [45,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [46,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [47,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [48,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [49,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [50,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [51,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [52,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [53,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [54,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [55,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [56,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [57,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [58,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [59,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [60,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [61,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [62,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [63,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [64,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [65,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [66,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [67,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [68,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [69,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [70,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [71,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [72,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
## [73,]   FALSE        FALSE FALSE  FALSE         FALSE            FALSE
# IL4 contains all the dta required for rest of model building and analysis
#Create a line chart for daily test positive and death count
# get the range for the x and y axis
xt=c(1:length(IL1$date))
xrange <- range(xt)
# get the test positive daily count in oldest first order
il_dposr<-rev(il_dpos)
yrange <- range(il_dposr)
# get the daily death count
il_dd <- IL1$deathIncrease
il_ddr<-rev(il_dd)
plotts.wge(il_ddr)

#il_ddr = c(il_ddr,rep('na',10))
# plot the graph
plot(xt, il_dposr, type="o", lwd=0.5,
    lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 14 to July 15)", ylab="Daily Test Positive / Death Count")
lines(xt, il_ddr, type="o", lwd=0.5,
    lty=2, col='red', pch=16,cex=0.4)

# add a title and subtitle
title("Daily State of Illinois Test Positive & Death Count")

# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)

#Create a line chart for daily test positive and death rate
# get the range for the x and y axis
xt=c(1:length(IL1$date))
xrange <- range(xt)
# get the test positive rate
dposrate_il1 <- IL1$positiveIncrease/(IL1$positiveIncrease+IL1$negativeIncrease)
dposrater_il1<-rev(dposrate_il1)
yrange <- range(dposrater_il1)
# get the death to positive rate
ddrate_il1 <- IL1$deathIncrease/IL1$positiveIncrease
ddrater_il1<-rev(ddrate_il1)

# plot the graph
plot(xt, dposrater_il1, type="o", lwd=0.5,
    lty=1, col='blue', pch=15,cex=0.4,xlab="Date (March 13 to July 15)", ylab="Daily Test Positive / Death Rate")
lines(xt, ddrater_il1, type="o", lwd=0.5,
    lty=2, col='red', pch=16,cex=0.4)

# add a title and subtitle
title("Daily Illinois Test Positive & Death Rate")

# add a legend
legend(100, yrange[2], legend=c("Positive", "Death"), cex=0.8, col=c("blue","red"), lty=1:2)

#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
plotts.sample.wge(il_dposr)

## $autplt
##  [1] 1.000000000 0.796412113 0.806828728 0.736799043 0.725236670 0.700315319
##  [7] 0.731508393 0.674156043 0.655915570 0.622998072 0.555249208 0.524297883
## [13] 0.477974050 0.453430523 0.433211698 0.385245848 0.353060413 0.301348967
## [19] 0.262968313 0.234679039 0.214123077 0.210215197 0.130791558 0.068548606
## [25] 0.039632961 0.001135416
## 
## $freq
##  [1] 0.008064516 0.016129032 0.024193548 0.032258065 0.040322581 0.048387097
##  [7] 0.056451613 0.064516129 0.072580645 0.080645161 0.088709677 0.096774194
## [13] 0.104838710 0.112903226 0.120967742 0.129032258 0.137096774 0.145161290
## [19] 0.153225806 0.161290323 0.169354839 0.177419355 0.185483871 0.193548387
## [25] 0.201612903 0.209677419 0.217741935 0.225806452 0.233870968 0.241935484
## [31] 0.250000000 0.258064516 0.266129032 0.274193548 0.282258065 0.290322581
## [37] 0.298387097 0.306451613 0.314516129 0.322580645 0.330645161 0.338709677
## [43] 0.346774194 0.354838710 0.362903226 0.370967742 0.379032258 0.387096774
## [49] 0.395161290 0.403225806 0.411290323 0.419354839 0.427419355 0.435483871
## [55] 0.443548387 0.451612903 0.459677419 0.467741935 0.475806452 0.483870968
## [61] 0.491935484 0.500000000
## 
## $db
##  [1]  16.23481380   7.36887552   0.22486668  -0.11267743   0.06735102
##  [6] -13.62918899  -3.31458032  -6.74445173 -12.86310627  -7.74797440
## [11]  -9.55876424  -3.76255134  -5.33791653  -3.99314378  -6.57628793
## [16]  -9.22084880  -3.35943839  -0.62576095 -11.04884186  -6.56841694
## [21] -18.14877986 -10.40638288  -8.62954959  -3.22832414  -8.08238954
## [26] -13.75991211 -11.95887210 -12.83400687  -5.24105725 -12.92896551
## [31]  -9.84344495 -17.49412875 -14.00836477 -16.11830620  -9.74842872
## [36]  -6.83788694  -6.85477339 -11.46349798 -12.09418720  -8.48971579
## [41]  -4.77777497  -3.15833709 -11.28433776 -18.55689837  -4.48439570
## [46]  -3.83873525 -10.56869637  -9.52313421 -15.52203060  -8.83351834
## [51] -16.13232064 -16.99545064  -5.24452238  -4.45771217  -9.05110156
## [56]  -3.87069848  -9.23999783  -4.18557574  -3.94053536  -4.94288821
## [61]  -6.58848732  -2.59515508
## 
## $dbz
##  [1]  10.5285511   9.9868452   9.0770422   7.7901591   6.1183074   4.0654590
##  [7]   1.6765936  -0.8912392  -3.2529921  -4.8378030  -5.4016026  -5.2839142
## [13]  -4.9068159  -4.5068098  -4.2035700  -4.0657608  -4.1275011  -4.3909252
## [19]  -4.8289712  -5.3898182  -6.0043338  -6.5998327  -7.1207948  -7.5483623
## [25]  -7.9046121  -8.2367144  -8.5915933  -8.9944633  -9.4362537  -9.8698375
## [31] -10.2175868 -10.3955513 -10.3503432 -10.0855265  -9.6552199  -9.1337358
## [37]  -8.5881618  -8.0669925  -7.6012936  -7.2111066  -6.9121571  -6.7202143
## [43]  -6.6519405  -6.7221359  -6.9377136  -7.2883434  -7.7335664  -8.1895519
## [49]  -8.5299234  -8.6260644  -8.4229759  -7.9778688  -7.4117763  -6.8355375
## [55]  -6.3162234  -5.8807002  -5.5309979  -5.2583496  -5.0526791  -4.9073770
## [61]  -4.8200636  -4.7908404
#damping faster compares to us
# try to work on positive count, rather than rate
aic5.wge(il_dposr,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 28    5    2   11.96898
## 35    6    4   11.96924
## 40    7    4   11.97535
## 45    8    4   11.97569
## 31    6    0   11.98079
# pick 5,2, AIC 11.96
aic5.wge(il_dposr,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic
##       p    q        bic
## 7     1    1   12.08159
## 11    2    0   12.10572
## 12    2    1   12.10711
## 8     1    2   12.11501
## 13    2    2   12.12728
# pick 1, 1? BIC=12.08
# estimate model phi and theta
il_estaic_ct <- est.arma.wge(il_dposr, p=5, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## 1.9096 -0.9278 -0.3548 0.3945 -0.0321 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9744B              1.0262               0.9744       0.0000
## 1-1.3814B+0.6814B^2    1.0136+-0.6634i      0.8255       0.0922
## 1+0.5365B             -1.8640               0.5365       0.5000
## 1-0.0902B              11.0805               0.0902       0.0000
##   
## 
f_il_dposr_ct <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_il_dposr_ct = mean((f_il_dposr_ct$f - il_dposr[length(f_il_dposr_ct$f)-20+1:length(f_il_dposr_ct$f)])^2)
ase_il_dposr_ct
## [1] 365406.4
# ASE 365406.4, too large, try to take a diff?
# complete the model, test residual
ljung.wge(f_il_dposr_ct$resid,p=5,q=2)
## Obs -0.01147759 0.01388252 0.0416049 -0.08473251 -0.07442198 0.1568715 -0.05467255 0.01164759 0.09721603 -0.05203082 0.02598788 -0.006662076 0.02082578 0.1139352 0.05008717 0.03741986 0.003962046 -0.03226451 -0.0615633 0.06745327 0.2628991 -0.02177301 -0.1217265 0.04606653
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 24.40802
## 
## $df
## [1] 17
## 
## $pval
## [1] 0.1087659
# pval = 0.1087659
plotts.wge(f_il_dposr_ct$res)

acf(f_il_dposr_ct$resid)

# forecast out 
f_il_dposr_ct_7 <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 7, lastn= F, limits=T )

f_il_dposr_ct_90 <- fore.aruma.wge(il_dposr, phi = il_estaic_ct$phi,theta = il_estaic_ct$theta, n.ahead = 90, lastn= F, limits=T )

# decide not to work on count
# $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
il_dposr_d1<- artrans.wge(il_dposr,phi.tr = 1)

#second lag is large
aic5.wge(il_dposr_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 4 4 
## Five Smallest Values of  aic
##       p    q        aic
## 18    3    2   11.95551
## 30    5    4   11.96930
## 23    4    2   11.97286
## 19    3    3   11.97557
## 24    4    3   11.97827
# pick 3,2, AIC 11.95
aic5.wge(il_dposr_d1,p=0:10,q=0:4, type='bic')
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 4 4 
## Five Smallest Values of  bic
##       p    q        bic
## 2     0    1   12.06537
## 18    3    2   12.09269
## 7     1    1   12.09311
## 3     0    2   12.09633
## 6     1    0   12.11201
# pick 0, 1 BIC=12.06
# estimate model phi and theta
il_estaic_d1_ct <- est.arma.wge(il_dposr_d1, p=3, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## 0.9036 -0.0164 -0.3605 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.4044B+0.7198B^2    0.9755+-0.6615i      0.8484       0.0948
## 1+0.5008B             -1.9967               0.5008       0.5000
##   
## 
f_il_dposr_d1_ct <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_il_dposr_d1_ct = mean((f_il_dposr_d1_ct$f - il_dposr_d1[length(f_il_dposr_d1_ct$f)-20+1:length(f_il_dposr_d1_ct$f)])^2)
ase_il_dposr_d1_ct
## [1] 599938.6
# ASE 599938.6, much larger, no go
# complete the model, test residual
ljung.wge(f_il_dposr_d1_ct$resid,p=3,q=2)
## Obs 0.001437668 0.02545517 0.0381145 -0.05838254 -0.05299002 0.1743374 -0.03713574 0.02233283 0.1003982 -0.04422496 0.02628858 -0.006758183 0.02240596 0.1133096 0.05248398 0.03981525 0.005025657 -0.03243836 -0.05456907 0.07989902 0.2716167 -0.01212245 -0.1154252 0.04202591
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 24.72814
## 
## $df
## [1] 19
## 
## $pval
## [1] 0.1696867
# pval = 0.1711935
plotts.wge(f_il_dposr_d1_ct$res)

acf(f_il_dposr_d1_ct$resid)

# forecast out 
f_il_dposr_d1_ct_7 <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 7, lastn= F, limits=F )

f_il_dposr_d1_ct_90 <- fore.aruma.wge(il_dposr, d=1, phi = il_estaic_d1_ct$phi,theta = il_estaic_d1_ct$theta, n.ahead = 90, lastn= F, limits=F )

# decide not to work on count
############################### start using from May 4th data ############################### 
#IL4 is the dataset ARMA(12,3)
plotts.sample.wge(IL4$posrate)

## $autplt
##  [1] 1.00000000 0.84706216 0.83128805 0.76463795 0.72652098 0.66806587
##  [7] 0.65656404 0.58804632 0.58844427 0.52522989 0.51461061 0.45186464
## [13] 0.40834268 0.40542888 0.37714113 0.35114251 0.31280904 0.27569139
## [19] 0.23423800 0.20997194 0.13705635 0.11281560 0.05964379 0.03460459
## [25] 0.02280035 0.01104549
## 
## $freq
##  [1] 0.01369863 0.02739726 0.04109589 0.05479452 0.06849315 0.08219178
##  [7] 0.09589041 0.10958904 0.12328767 0.13698630 0.15068493 0.16438356
## [13] 0.17808219 0.19178082 0.20547945 0.21917808 0.23287671 0.24657534
## [19] 0.26027397 0.27397260 0.28767123 0.30136986 0.31506849 0.32876712
## [25] 0.34246575 0.35616438 0.36986301 0.38356164 0.39726027 0.41095890
## [31] 0.42465753 0.43835616 0.45205479 0.46575342 0.47945205 0.49315068
## 
## $db
##  [1]  13.4574629   5.1107843   3.3822758   0.6611536   1.1800172  -3.2278316
##  [7]  -5.4642859  -2.6045214  -1.2788882  -7.2501220  -3.5425945 -13.6509335
## [13]  -8.6392366 -11.8799891  -4.1894316  -7.7959631 -10.9367525 -11.7157964
## [19] -11.3206333 -13.0915675  -4.9168784 -17.5880935  -5.6019065 -11.6479085
## [25]  -8.4477905 -21.5028852  -3.8501267  -6.9379366  -7.2994506 -11.0910130
## [31] -17.8571548 -15.8191554 -12.0724100  -8.6386645  -3.4869624  -5.4841390
## 
## $dbz
##  [1]  9.4412192  8.5133298  6.9713549  4.8515255  2.2895332 -0.3637689
##  [7] -2.5643194 -4.0446708 -5.0606566 -5.8974958 -6.6385691 -7.3016628
## [13] -7.8956921 -8.3949553 -8.7882797 -9.1353135 -9.4903053 -9.7860929
## [19] -9.8711977 -9.7046024 -9.4257392 -9.1881866 -9.0243456 -8.8693115
## [25] -8.6712053 -8.4748535 -8.4006542 -8.5580256 -8.9616021 -9.4489462
## [31] -9.6451720 -9.2156500 -8.2827254 -7.2683189 -6.4866336 -6.0746887
il_dposrater = IL4$posrate

# try ARMA first
aic5.wge(il_dposrater,p=0:16,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 13 4 
## Five Smallest Values of  aic
##       p    q        aic
## 64   12    3  0.5271353
## 72   14    1  0.5672285
## 63   12    2  0.5851050
## 61   12    0  0.5896450
## 81   16    0  0.6073353
# pick 12,3, AIC 0.527

# estimate model phi and theta
il_estaic <- est.arma.wge(il_dposrater, p=12, q=3, factor = T)
## 
## Coefficients of Original polynomial:  
## -0.1173 0.1314 -0.0044 0.5008 0.3371 0.5697 0.0008 -0.0157 0.0213 -0.0607 0.0493 -0.4584 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.9460B+0.9740B^2   -0.9989+-0.1697i      0.9869       0.4732
## 1-0.5607B+0.9725B^2    0.2883+-0.9722i      0.9862       0.2041
## 1-1.9185B+0.9203B^2    1.0423+-0.0145i      0.9593       0.0022
## 1+1.2581B+0.8601B^2   -0.7313+-0.7923i      0.9274       0.3686
## 1+0.5071B+0.8318B^2   -0.3048+-1.0532i      0.9120       0.2948
## 1-1.1147B+0.7350B^2    0.7583+-0.8863i      0.8573       0.1374
##   
## 
f_il_dposr <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_il_dposr = mean((f_il_dposr$f - il_dposrater[(length(il_dposrater)-20+1):length(il_dposrater)])^2)
ase_il_dposr
## [1] 0.7890908
# ASE  0.7890908
# complete the model, test residual
ljung.wge(f_il_dposr$resid,p=12,q=3)
## Obs -0.063869 -0.1135724 0.008518975 -0.07065248 -0.2019947 -0.07951013 0.07520751 -0.1340275 0.02708842 -0.03805784 -0.02526089 0.110918 -0.06004233 0.02460496 0.08762631 0.02826428 0.01172655 0.05007233 0.06996003 -0.09591883 0.0437517 -0.09940294 -0.05619927 -0.0982514
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 14.42298
## 
## $df
## [1] 9
## 
## $pval
## [1] 0.1080547
# pval = 0.1080547 white noise
plotts.wge(f_il_dposr$res)

acf(f_il_dposr$resid)

# forecast out 
f_il_dposr_7 <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 7, lastn= F, limits=T )

f_il_dposr_90 <- fore.aruma.wge(il_dposrater, d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = 90, lastn= F, limits=T )

IL5 = ts(IL4$posrate)
#rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
    forecasts = fore.aruma.wge(il_dposrater[i:(i+(trainingSize-1))], d=0, phi = il_estaic$phi,theta = il_estaic$theta, n.ahead = horizon)
   
  ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 0.36411950 0.22329315 0.50961062 0.44589024 0.46011665 0.13418412
##  [7] 0.41841939 0.19013453 0.15894212 0.18353181 0.08915786 0.24691020
## [13] 0.39616815 0.70155439 0.57833722 0.62492572 0.40855099
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.08916 0.19013 0.39617 0.36081 0.46012 0.70155
WindowedASE #0.3608145
## [1] 0.3608145
#diff data (ARIMA(10,2,0))
il_dposrater_d1 <- artrans.wge(il_dposrater,phi.tr = 1)

aic5.wge(il_dposrater_d1,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 38    7    2  0.6273992
## 43    8    2  0.6357281
## 48    9    2  0.6393444
## 53   10    2  0.6606535
## 7     1    1  0.6943154
#pick 7,2, aic=0.627
# estimate model phi and theta
il_estaic_d1 <- est.arma.wge(il_dposrater_d1, p=7, q=2, factor = T)
## 
## Coefficients of Original polynomial:  
## -0.9598 0.2974 0.4454 0.2274 0.0791 0.3177 0.3571 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9684B              1.0326               0.9684       0.0000
## 1+1.8857B+0.9057B^2   -1.0410+-0.1429i      0.9517       0.4783
## 1+0.8562B+0.6956B^2   -0.6154+-1.0290i      0.8340       0.3358
## 1-0.8138B+0.5852B^2    0.6953+-1.1070i      0.7650       0.1607
##   
## 
# do another diff
il_dposrater_d2 <- artrans.wge(il_dposrater_d1,phi.tr = 1)

aic5.wge(il_dposrater_d2,p=0:10,q=0:4)
## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic
##       p    q        aic
## 51   10    0  0.5817741
## 36    7    0  0.8473765
## 31    6    0  0.8482152
## 41    8    0  0.8549106
## 46    9    0  0.8832552
il_estaic_d2 <- est.arma.wge(il_dposrater_d2, p=10, q=0, factor = T)
## 
## Coefficients of Original polynomial:  
## -1.7663 -2.0598 -2.1473 -2.1295 -2.1472 -1.7187 -1.4115 -1.1591 -0.8642 -0.4916 
## 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.8872B+0.9225B^2   -1.0228+-0.1943i      0.9605       0.4701
## 1+1.2660B+0.8951B^2   -0.7072+-0.7856i      0.9461       0.3667
## 1-0.6150B+0.8627B^2    0.3565+-1.0159i      0.9288       0.1963
## 1+0.5744B+0.8524B^2   -0.3369+-1.0294i      0.9232       0.3003
## 1-1.3462B+0.8096B^2    0.8314+-0.7375i      0.8998       0.1155
##   
## 
f_il_dposr_d2 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 20, lastn= T, limits=F )

# ASE
ase_il_dposr_d2 = mean((f_il_dposr_d2$f - il_dposrater[(length(il_dposrater)-20+1):length(il_dposrater)])^2)
ase_il_dposr_d2
## [1] 0.3281278
# ASE  30.39611
# complete the model, test residual
ljung.wge(f_il_dposr_d2$resid,p=10,q=0)
## Obs 0.09210403 -0.07900541 0.139213 -0.1145658 -0.2052481 -0.1140479 0.04034315 -0.2857225 -0.1541804 -0.1375917 -0.06348626 0.1214589 -0.01428639 0.2038236 0.1721987 -0.03289784 -0.02268816 0.2011555 0.04370681 -0.06099152 0.03421798 -0.1612467 -0.1604852 -0.09692866
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 38.72615
## 
## $df
## [1] 14
## 
## $pval
## [1] 0.0004019996
# pval = 0.000402, NOT white noise
plotts.wge(f_il_dposr_d2$res)

acf(f_il_dposr_d2$resid)

# forecast out 
f_il_dposr_d2_7 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 7, lastn= F, limits=T )

f_il_dposr_d2_90 <- fore.aruma.wge(il_dposrater, d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = 90, lastn= F, limits=T )

#rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
    forecasts = fore.aruma.wge(il_dposrater[i:(i+(trainingSize-1))], d=2, phi = il_estaic_d2$phi,theta = il_estaic_d2$theta, n.ahead = horizon)
   
  ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 0.56210206 0.14608297 0.15136368 0.16864294 0.15309334 0.08232689
##  [7] 0.26140242 0.06814249 0.04551926 0.08659063 0.08965176 0.32734587
## [13] 0.64508697 0.79941523 0.66215248 0.82064134 0.66171876
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04552 0.08965 0.16864 0.33713 0.64509 0.82064
WindowedASE #0.3371341
## [1] 0.3371341

VAR, MLP, Rolling ASE for IL

#IL4 is the dataset
#hospitalizedCurrently  0.91
#deathIncrease    0.634
#inIcuCurrently   0.882
#onVent                 0.872
#negativeIncrease       -0.654
# we'll move forward for multi variants analysis factor these 3 variables
ccf(IL4$posrate,IL4$hospitalized)

ccf(IL4$posrate,IL4$deathIncrease)

ccf(IL4$posrate,IL4$inIcu)

ccf(IL4$posrate,IL4$onVent)

ccf(IL4$posrate,IL4$negativeIncrease)

IL4_try4=data.frame(posrate=IL4$posrate,hospitalized=IL4$hospitalized)
IL4_try2=data.frame(posrate=IL4$posrate,deathIncrease=IL4$deathIncrease)
IL4_try3=data.frame(posrate=IL4$posrate,inIcu=IL4$inIcu,onVent=IL4$onVent)
IL4_try5=data.frame(posrate=IL4$posrate,negativeIncrease=IL4$negativeIncrease)

set.seed(1)
VARselect(IL4_try4,type='both',lag.max=16) #7, many Infs
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     15      1      1     15 
## 
## $criteria
##                   1            2            3            4            5
## AIC(n)     9.628964     9.630885     9.613715     9.622478     9.657919
## HQ(n)      9.740402     9.798043     9.836592     9.901075     9.992235
## SC(n)      9.915708    10.061001    10.187203    10.339338    10.518151
## FPE(n) 15205.700180 15251.767560 15024.615986 15211.606633 15846.465970
##                   6            7            8            9           10
## AIC(n)     9.694604     9.526542     9.519532     9.526003     9.529141
## HQ(n)     10.084639     9.972296    10.021005    10.083196    10.142053
## SC(n)     10.698208    10.673518    10.809880    10.959723    11.106233
## FPE(n) 16565.992283 14150.567143 14245.940099 14592.594110 14965.172667
##                  11           12           13           14          15
## AIC(n)     9.463028     9.370639     9.286496     9.239543    8.854342
## HQ(n)     10.131659    10.094989    10.066565    10.075332    9.745850
## SC(n)     11.183493    11.234475    11.293704    11.390124   11.148295
## FPE(n) 14395.934386 13572.270529 12994.666942 13022.355567 9397.974035
##                  16
## AIC(n)     8.919704
## HQ(n)      9.866932
## SC(n)     11.357029
## FPE(n) 10769.465194
lsfit_IL4=VAR(IL4_try4, p=15, type ='both')################try to put few variables in xtrgen instead?

# short term forecast VAR US
preds_IL4=predict(lsfit_IL4,n.ahead=7)
preds_IL4$fcst$posrate[1:7,1]
## [1] 3.225589 4.788572 2.936862 4.454516 3.585940 3.802660 2.588652
# making graph for 7 ahead prediction
plot(IL4$posrate, type="o",xlim=c(1,81),ylim=c(1,15))
points(seq(74,80,1),preds_IL4$fcst$posrate[1:7,1], type='o',col="blue")

# long term forecast VAR US
preds_IL4L=predict(lsfit_IL4,n.ahead=90)
preds_IL4L$fcst$posrate[1:90,1]
##  [1] 3.225589 4.788572 2.936862 4.454516 3.585940 3.802660 2.588652 3.219270
##  [9] 3.275490 3.991660 3.955019 4.056987 3.401566 3.103426 3.956585 3.481761
## [17] 4.303974 3.731541 4.422370 3.600931 3.838421 3.216895 3.802075 3.752772
## [25] 4.220904 4.204332 3.918949 3.764884 3.644447 4.174983 3.996579 4.495958
## [33] 4.191006 4.407883 3.811853 4.267713 3.984396 4.393886 4.454797 4.668526
## [41] 4.507530 4.327197 4.443755 4.428838 4.808730 4.797175 5.071262 4.703751
## [49] 4.877480 4.644730 4.973739 4.924364 5.240909 5.209972 5.237510 5.122357
## [57] 5.105144 5.263664 5.315188 5.610734 5.530140 5.625867 5.378910 5.551237
## [65] 5.484604 5.742452 5.766311 5.924127 5.815645 5.808306 5.799436 5.844240
## [73] 6.007485 6.077398 6.223652 6.099318 6.152542 6.042116 6.205597 6.224035
## [81] 6.427576 6.414783 6.456755 6.385807 6.409186 6.465100 6.552707 6.693191
## [89] 6.717242 6.768358
# making graph for 90 ahead prediction
plot(IL4$posrate, type="o",xlim=c(1,164),ylim=c(1,16))
points(seq(74,163,1),preds_IL4L$fcst$posrate[1:90,1], type='o',col="blue")

# looks flat and a bit following the upwards trend

# Rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    ILp = IL4[i:(i+trainingSize),]
    lsfit_IL4 = VAR(ILp,p=6)
    forecasts = predict(lsfit_IL4,n.ahead = horizon)
  
  ASE = mean((IL4$posrate[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$fcst$posrate)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder
##  [1] 15.391349 24.153240 29.111119  9.741131  9.606046  8.875742  7.274734
##  [8]  7.724953 11.392253 10.752786 11.021295  7.644551  6.838913  6.531442
## [15]  6.281423  4.852618  4.097239
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.097   6.839   8.876  10.664  11.021  29.111
WindowedASE #10.66417
## [1] 10.66417
IL5 = ts(IL4$posrate)
library(nnfor)
set.seed(5)
fitmlp_ILu=mlp(IL5,rep=100)

# short term forecast MLP uni US
f_fitmlp_ILu=forecast(fitmlp_ILu,h=7)
plot(f_fitmlp_ILu)

# long term forecasst MLP uni us
f_fitmlp_ILuL=forecast(fitmlp_ILu,h=90)
plot(f_fitmlp_ILuL)

# do a better job at visulize the 90 day forecast
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,17))
lines(seq(74,163,1),f_fitmlp_ILuL$mean,col = "blue")

# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    ILp = ts(IL5[i:(i+trainingSize)])
    fitmlp_IL5 = mlp(ILp,rep=25)
    forecasts = forecast(fitmlp_IL5,h = horizon)
  
  ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 0.22796774 0.21705668 0.16349143 0.08359748 0.09554394 0.04631433
##  [7] 0.05058953 0.10353288 0.12407947 0.16129188 0.23508583 0.18022154
## [13] 0.86000455 0.62843868 0.79646606 0.67404134 0.34066257
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04631 0.10353 0.18022 0.29343 0.34066 0.86000
WindowedASE #0.2934345
## [1] 0.2934345
#short term
ensemble_IL_uni = (f_fitmlp_ILu$mean + f_il_dposr_d2_7$f +f_il_dposr_7$f)/3
plot(IL5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_IL_uni,col = "blue")

#long term
ensemble_IL_uniL = (f_fitmlp_ILuL$mean + f_il_dposr_d2_90$f +f_il_dposr_90$f)/3
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_IL_uniL,col = "blue")

set.seed(2)
# forecast regressor value in order to MLP posrate forecast into the future......short term
hospitalized = ts(IL4$hospitalized)
fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized7=forecast(fitmlp_hospitalized7,h=7)
plot(f_fitmlp_hospitalized7)

hospitalized7 = ts(c(IL4$hospitalized, f_fitmlp_hospitalized7$mean))

deathIncrease = ts(IL4$deathIncrease)
fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease7=forecast(fitmlp_deathIncrease7,h=7)
plot(f_fitmlp_deathIncrease7)

deathIncrease7 = ts(c(IL4$deathIncrease, f_fitmlp_deathIncrease7$mean))

inIcu = ts(IL4$inIcu)
fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu7=forecast(fitmlp_inIcu7,h=7)
plot(f_fitmlp_inIcu7)

inIcu7 = ts(c(IL4$inIcu, f_fitmlp_inIcu7$mean))

onVent = ts(IL4$onVent)
fitmlp_onVent7=mlp(onVent,rep=30)
f_fitmlp_onVent7=forecast(fitmlp_onVent7,h=7)
plot(f_fitmlp_onVent7)

onVent7 = ts(c(IL4$onVent, f_fitmlp_onVent7$mean))

negativeIncrease = ts(IL4$negativeIncrease)
fitmlp_negativeIncrease7=mlp(negativeIncrease,rep=30)
f_fitmlp_negativeIncrease7=forecast(fitmlp_negativeIncrease7,h=7)
plot(f_fitmlp_negativeIncrease7)

negativeIncrease7 = ts(c(IL4$negativeIncrease, f_fitmlp_negativeIncrease7$mean))
# setup regressor df
IL6_old = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7,onVent=onVent7,negativeIncrease=negativeIncrease7)

# lon term forecast is over 100% half way, try to adjust xreg
IL6 = data.frame(hospitalized=hospitalized7,deathIncrease=deathIncrease7,inIcu=inIcu7,onVent=onVent7)

fitmlp_ILm=mlp(IL5,rep=30, xreg=IL6)
fitmlp_ILm
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (2,3,4)
## Forecast combined using the median operator.
## MSE: 0.2944.
f_fitmlp_ILm=forecast(fitmlp_ILm,h=7,xreg =IL6)
plot(f_fitmlp_ILm)

# forecast regressor value in order to MLP posrate forecast into the future.. long term
#hospitalized = ts(IL4$hospitalized)
#fitmlp_hospitalized7=mlp(hospitalized,rep=30)
f_fitmlp_hospitalized90=forecast(fitmlp_hospitalized7,h=90)
plot(f_fitmlp_hospitalized90)

hospitalized90 = ts(c(IL4$hospitalized, f_fitmlp_hospitalized90$mean))

#deathIncrease = ts(IL4$deathIncrease)
#fitmlp_deathIncrease7=mlp(deathIncrease,rep=30)
f_fitmlp_deathIncrease90=forecast(fitmlp_deathIncrease7,h=90)
plot(f_fitmlp_deathIncrease7)

deathIncrease90 = ts(c(IL4$deathIncrease, f_fitmlp_deathIncrease90$mean))

#inIcu = ts(IL4$inIcu)
#fitmlp_inIcu7=mlp(inIcu,rep=30)
f_fitmlp_inIcu90=forecast(fitmlp_inIcu7,h=90)
plot(f_fitmlp_inIcu90)

inIcu90 = ts(c(IL4$inIcu, f_fitmlp_inIcu90$mean))

#onVent = ts(IL4$onVent)
#fitmlp_onVent7=mlp(onVent,rep=30)
f_fitmlp_onVent90=forecast(fitmlp_onVent7,h=90)
plot(f_fitmlp_onVent90)

onVent90 = ts(c(IL4$onVent, f_fitmlp_onVent90$mean))

#negativeIncrease = ts(IL4$negativeIncrease)
#fitmlp_negativeIncrease7=mlp(negativeIncrease,rep=30)
f_fitmlp_negativeIncrease90=forecast(fitmlp_negativeIncrease7,h=90)
plot(f_fitmlp_negativeIncrease90)

negativeIncrease90 = ts(c(IL4$negativeIncrease, f_fitmlp_negativeIncrease90$mean))
# setup regressor df
IL6L_old = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90,onVent=onVent90,negativeIncrease=negativeIncrease90)

IL6L = data.frame(hospitalized=hospitalized90,deathIncrease=deathIncrease90,inIcu=inIcu90,onVent=onVent90)

fitmlp_ILmL=mlp(IL5,rep=30, xreg=IL6L)
fitmlp_ILmL
## MLP fit with 5 hidden nodes and 30 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (2,3,4)
## Forecast combined using the median operator.
## MSE: 0.3589.
f_fitmlp_ILmL=forecast(fitmlp_ILmL,h=90,xreg =IL6L)
plot(f_fitmlp_ILmL)

# fix negative rate forecast, set to zero
for (i in 1:length(f_fitmlp_ILmL$mean)) {
    if (f_fitmlp_ILmL$mean[i]<0.0) f_fitmlp_ILmL$mean[i]=0.0
    }

# try plot the mean and original data, make the graph more visible. Otherwise too wide of different reps, make just a flat line. 
plot(IL5, type = "l",xlim=c(1,163),ylim=c(0,15))
lines(seq(74,163,1),f_fitmlp_ILmL$mean,col = "blue")

# rolling ASE
trainingSize = 50
horizon = 7
ASEHolder = numeric()

for( i in 1:(73-(trainingSize + horizon) + 1))
{
  
    ILp = ts(IL5[i:(i+trainingSize)])
    ILR = IL6[i:(i+trainingSize+horizon),]
    fitmlp_IL6 = mlp(ILp,rep=20,xreg=ILR)
    forecasts = forecast(fitmlp_IL6,h = horizon,xreg=ILR)
  
  ASE = mean((IL5[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$mean)^2)
  
  ASEHolder[i] = ASE
  
}

ASEHolder #  taking 10 mins to run
##  [1] 0.09046104 0.63251903 0.31769679 0.42378685 0.13899786 0.39011539
##  [7] 0.20221401 0.11334207 0.12992757 0.13298685 0.26756311 0.30200406
## [13] 0.68266885 0.34401345 0.32363498 0.35984946 0.34144762
hist(ASEHolder)

WindowedASE = mean(ASEHolder)

summary(ASEHolder)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.09046 0.13900 0.31770 0.30548 0.35985 0.68267
WindowedASE #0.3054841
## [1] 0.3054841
#short term
ensemble_IL = (f_fitmlp_ILm$mean + preds_IL4$fcst$posrate[,1])/2
plot(IL5, type = "l",xlim=c(1,81),ylim=c(1,12))
lines(seq(74,80,1),ensemble_IL,col = "blue")

#long term
ensemble_ILL = (f_fitmlp_ILmL$mean + preds_IL4L$fcst$posrate[,1])/2
plot(IL5, type = "l",xlim=c(1,163),ylim=c(1,12))
lines(seq(74,163,1),ensemble_ILL,col = "blue")